PageRenderTime 27ms CodeModel.GetById 11ms app.highlight 13ms RepoModel.GetById 1ms app.codeStats 0ms

/arch/ia64/sn/kernel/bte_error.c

https://bitbucket.org/evzijst/gittest
C | 198 lines | 118 code | 24 blank | 56 comment | 19 complexity | 3a7bbaaaa509bf3498496c70369f7c2e MD5 | raw file
  1/*
  2 * This file is subject to the terms and conditions of the GNU General Public
  3 * License.  See the file "COPYING" in the main directory of this archive
  4 * for more details.
  5 *
  6 * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
  7 */
  8
  9#include <linux/types.h>
 10#include <asm/sn/sn_sal.h>
 11#include "ioerror.h"
 12#include <asm/sn/addrs.h>
 13#include <asm/sn/shubio.h>
 14#include <asm/sn/geo.h>
 15#include "xtalk/xwidgetdev.h"
 16#include "xtalk/hubdev.h"
 17#include <asm/sn/bte.h>
 18#include <asm/param.h>
 19
 20/*
 21 * Bte error handling is done in two parts.  The first captures
 22 * any crb related errors.  Since there can be multiple crbs per
 23 * interface and multiple interfaces active, we need to wait until
 24 * all active crbs are completed.  This is the first job of the
 25 * second part error handler.  When all bte related CRBs are cleanly
 26 * completed, it resets the interfaces and gets them ready for new
 27 * transfers to be queued.
 28 */
 29
 30void bte_error_handler(unsigned long);
 31
 32/*
 33 * Wait until all BTE related CRBs are completed
 34 * and then reset the interfaces.
 35 */
 36void bte_error_handler(unsigned long _nodepda)
 37{
 38	struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
 39	spinlock_t *recovery_lock = &err_nodepda->bte_recovery_lock;
 40	struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer;
 41	nasid_t nasid;
 42	int i;
 43	int valid_crbs;
 44	unsigned long irq_flags;
 45	volatile u64 *notify;
 46	bte_result_t bh_error;
 47	ii_imem_u_t imem;	/* II IMEM Register */
 48	ii_icrb0_d_u_t icrbd;	/* II CRB Register D */
 49	ii_ibcr_u_t ibcr;
 50	ii_icmr_u_t icmr;
 51	ii_ieclr_u_t ieclr;
 52
 53	BTE_PRINTK(("bte_error_handler(%p) - %d\n", err_nodepda,
 54		    smp_processor_id()));
 55
 56	spin_lock_irqsave(recovery_lock, irq_flags);
 57
 58	if ((err_nodepda->bte_if[0].bh_error == BTE_SUCCESS) &&
 59	    (err_nodepda->bte_if[1].bh_error == BTE_SUCCESS)) {
 60		BTE_PRINTK(("eh:%p:%d Nothing to do.\n", err_nodepda,
 61			    smp_processor_id()));
 62		spin_unlock_irqrestore(recovery_lock, irq_flags);
 63		return;
 64	}
 65	/*
 66	 * Lock all interfaces on this node to prevent new transfers
 67	 * from being queued.
 68	 */
 69	for (i = 0; i < BTES_PER_NODE; i++) {
 70		if (err_nodepda->bte_if[i].cleanup_active) {
 71			continue;
 72		}
 73		spin_lock(&err_nodepda->bte_if[i].spinlock);
 74		BTE_PRINTK(("eh:%p:%d locked %d\n", err_nodepda,
 75			    smp_processor_id(), i));
 76		err_nodepda->bte_if[i].cleanup_active = 1;
 77	}
 78
 79	/* Determine information about our hub */
 80	nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode);
 81
 82	/*
 83	 * A BTE transfer can use multiple CRBs.  We need to make sure
 84	 * that all the BTE CRBs are complete (or timed out) before
 85	 * attempting to clean up the error.  Resetting the BTE while
 86	 * there are still BTE CRBs active will hang the BTE.
 87	 * We should look at all the CRBs to see if they are allocated
 88	 * to the BTE and see if they are still active.  When none
 89	 * are active, we can continue with the cleanup.
 90	 *
 91	 * We also want to make sure that the local NI port is up.
 92	 * When a router resets the NI port can go down, while it
 93	 * goes through the LLP handshake, but then comes back up.
 94	 */
 95	icmr.ii_icmr_regval = REMOTE_HUB_L(nasid, IIO_ICMR);
 96	if (icmr.ii_icmr_fld_s.i_crb_mark != 0) {
 97		/*
 98		 * There are errors which still need to be cleaned up by
 99		 * hubiio_crb_error_handler
100		 */
101		mod_timer(recovery_timer, HZ * 5);
102		BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda,
103			    smp_processor_id()));
104		spin_unlock_irqrestore(recovery_lock, irq_flags);
105		return;
106	}
107	if (icmr.ii_icmr_fld_s.i_crb_vld != 0) {
108
109		valid_crbs = icmr.ii_icmr_fld_s.i_crb_vld;
110
111		for (i = 0; i < IIO_NUM_CRBS; i++) {
112			if (!((1 << i) & valid_crbs)) {
113				/* This crb was not marked as valid, ignore */
114				continue;
115			}
116			icrbd.ii_icrb0_d_regval =
117			    REMOTE_HUB_L(nasid, IIO_ICRB_D(i));
118			if (icrbd.d_bteop) {
119				mod_timer(recovery_timer, HZ * 5);
120				BTE_PRINTK(("eh:%p:%d Valid %d, Giving up\n",
121					    err_nodepda, smp_processor_id(),
122					    i));
123				spin_unlock_irqrestore(recovery_lock,
124						       irq_flags);
125				return;
126			}
127		}
128	}
129
130	BTE_PRINTK(("eh:%p:%d Cleaning up\n", err_nodepda, smp_processor_id()));
131	/* Reenable both bte interfaces */
132	imem.ii_imem_regval = REMOTE_HUB_L(nasid, IIO_IMEM);
133	imem.ii_imem_fld_s.i_b0_esd = imem.ii_imem_fld_s.i_b1_esd = 1;
134	REMOTE_HUB_S(nasid, IIO_IMEM, imem.ii_imem_regval);
135
136	/* Clear BTE0/1 error bits */
137	ieclr.ii_ieclr_regval = 0;
138	if (err_nodepda->bte_if[0].bh_error != BTE_SUCCESS)
139		ieclr.ii_ieclr_fld_s.i_e_bte_0 = 1;
140	if (err_nodepda->bte_if[1].bh_error != BTE_SUCCESS)
141		ieclr.ii_ieclr_fld_s.i_e_bte_1 = 1;
142	REMOTE_HUB_S(nasid, IIO_IECLR, ieclr.ii_ieclr_regval);
143
144	/* Reinitialize both BTE state machines. */
145	ibcr.ii_ibcr_regval = REMOTE_HUB_L(nasid, IIO_IBCR);
146	ibcr.ii_ibcr_fld_s.i_soft_reset = 1;
147	REMOTE_HUB_S(nasid, IIO_IBCR, ibcr.ii_ibcr_regval);
148
149	for (i = 0; i < BTES_PER_NODE; i++) {
150		bh_error = err_nodepda->bte_if[i].bh_error;
151		if (bh_error != BTE_SUCCESS) {
152			/* There is an error which needs to be notified */
153			notify = err_nodepda->bte_if[i].most_rcnt_na;
154			BTE_PRINTK(("cnode %d bte %d error=0x%lx\n",
155				    err_nodepda->bte_if[i].bte_cnode,
156				    err_nodepda->bte_if[i].bte_num,
157				    IBLS_ERROR | (u64) bh_error));
158			*notify = IBLS_ERROR | bh_error;
159			err_nodepda->bte_if[i].bh_error = BTE_SUCCESS;
160		}
161
162		err_nodepda->bte_if[i].cleanup_active = 0;
163		BTE_PRINTK(("eh:%p:%d Unlocked %d\n", err_nodepda,
164			    smp_processor_id(), i));
165		spin_unlock(&err_nodepda->bte_if[i].spinlock);
166	}
167
168	del_timer(recovery_timer);
169
170	spin_unlock_irqrestore(recovery_lock, irq_flags);
171}
172
173/*
174 * First part error handler.  This is called whenever any error CRB interrupt
175 * is generated by the II.
176 */
177void
178bte_crb_error_handler(cnodeid_t cnode, int btenum,
179                      int crbnum, ioerror_t * ioe, int bteop)
180{
181	struct bteinfo_s *bte;
182
183
184	bte = &(NODEPDA(cnode)->bte_if[btenum]);
185
186	/*
187	 * The caller has already figured out the error type, we save that
188	 * in the bte handle structure for the thread excercising the
189	 * interface to consume.
190	 */
191	bte->bh_error = ioe->ie_errortype + BTEFAIL_OFFSET;
192	bte->bte_error_count++;
193
194	BTE_PRINTK(("Got an error on cnode %d bte %d: HW error type 0x%x\n",
195		bte->bte_cnode, bte->bte_num, ioe->ie_errortype));
196	bte_error_handler((unsigned long) NODEPDA(cnode));
197}
198