/*
 * Copyright (c) 1982, 1986, 1990 Regents of the University of California.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *	This product includes software developed by the University of
 *	California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *	from: @(#)kern_physio.c	7.20 (Berkeley) 5/11/91
 *	kern_physio.c,v 1.6 1993/07/08 10:53:40 cgd Exp
 */

#include "param.h"
#include "systm.h"
#include "buf.h"
#include "conf.h"
#include "proc.h"
/* #include "seg.h" XXX - cgd */
#include "trace.h"
/* #include "map.h" XXX - cgd */
#include "vnode.h"
#include "specdev.h"

#ifdef HPUXCOMPAT
#include "user.h"
#endif

#include "malloc.h"	/* XXX -- i had to add this, so it could very
			 * well be wrong... -- cgd
			 */
/*
 * This routine does raw device I/O for a user process.
 *
 * If the user has the proper access privileges, the process is
 * marked 'delayed unlock' and the pages involved in the I/O are
 * faulted and locked. After the completion of the I/O, the pages
 * are unlocked.
 */
int
physio(strat, bp, dev, rw, mincnt, uio)
	int (*strat)(); 
	register struct buf *bp;
	dev_t dev;
	int rw;
	u_int (*mincnt)();
	struct uio *uio;
{
	/*
	 * Body deleted
	 *
	 * And reimplemented by cgd.
	 * comments in brackets are my own, the rest come from
	 * _The Design and Implementation of the 4.3 BSD UNIX Operating System_
	 * by Leffler, McKusick, et al., page 232
	 *
	 * also, parts of this snarfed from wfj's physio, but *it*
	 * was mostly ick.
	 */
	int s, i;
	int error, wanted,
	    nobuf = (bp == NULL);

	if (nobuf) { /* [ if we have no buffer, we need one... ] */
		/* [ so malloc it...  XXX? ] */
		bp = (struct buf *)malloc(sizeof(*bp), M_TEMP, M_WAITOK);
		bzero(bp, sizeof(*bp));
	}

	/* check user read/write access to the data buffer
	 * [ and if no access, then return EFAULT ]
	 */
	for (i = 0; i < uio->uio_iovcnt; i++) {
		/* [ check each iov one by one.  note that we're
		 *   given an rw param, so we ignore the uio's
		 *   rw parameter...  also note that if we're
		 *   doing a read, that's a *write* to user-space... ]
		 */
		if (!useracc(uio->uio_iov[i].iov_base,
			     uio->uio_iov[i].iov_len,
			     (rw == B_READ) ? B_WRITE : B_READ)) {
			if (nobuf)
				free(bp, M_TEMP);
			return EFAULT;
		}
	}

	s = splbio();		/* raise the processor priority to splbio */

	/* while (the buffer is marked busy) { */
	while (bp->b_flags & B_BUSY) {
		bp->b_flags |= B_WANTED;	/* mark the buffer wanted */
		/* wait until the buffer is available */
		tsleep((caddr_t) bp, PRIBIO+1, "physbuf", 0);
	} /* } */

	/* [ mark it as busy so it's not reused by somebody else ] */
	bp->b_flags |= B_BUSY;

	splx(s);				/* lower the priority level */

	error = 0;

	/* [ for each element of the iov ] */
	for (i = 0; i < uio->uio_iovcnt && !error; i++) {
		struct iovec *iovp;
		iovp = &uio->uio_iov[i];

		/* set up the fixed part of the buffer for a transfer */
		/* [ also, clear error flag, but that's done later ] */
		bp->b_error = 0;		/* no error yet */
		bp->b_proc = curproc;		/* on behalf of this process */

		/* while (there are data to transfer and no I/O error) { */
		while (iovp->iov_len > 0 && !error) {
			int todo, done;
			caddr_t tmp;

			/* mark the buffer busy for physical I/O
			 */
			bp->b_flags = B_BUSY | B_PHYS | rw;

			/* set up the buffer for a maximum-sized transfer */
			bp->b_dev = dev;
			bp->b_blkno = btodb(uio->uio_offset);
			bp->b_bcount = iovp->iov_len;
			/* [ base of buffer is iov's ] */
			bp->b_un.b_addr = iovp->iov_base;

			/* call minphys [actually mincnt] to bound the transfer size */
			todo = (*mincnt)(bp);

			/* [ and if it returns zero, e.g. in the "end of disk"
			 *   case, bail... ] */
			if (todo == 0)
				goto leave;

			/* lock the part of the user address space involved in
			 *   the transfer
			 * [ vmapbuf clobbers the b_addr, so save it first ]
			 */
			tmp = bp->b_un.b_addr;
			vslock(bp->b_un.b_addr, todo);
			vmapbuf(bp);

			/* call strategy to start the transfer
			 * [ some of the next bit snarfed from wfj's machdep.c ]
			 */
			(*strat)(bp);

			s = splbio();	/* raise the priority level to splbio */
			/* wait for the transfer to complete */
			while ((bp->b_flags & B_DONE) == 0)
				tsleep((caddr_t) bp, PRIBIO, "physio", 0);

			/* unlock the part of the address space previously locked */
			vunmapbuf(bp);
			vsunlock(tmp, todo, 0); /* [ 3rd param unused!!! ] */

			splx(s); /* lower the priority level */

			/* deduct the transfer size from the total number
			 *   of data to transfer
			 */
			done = bp->b_bcount - bp->b_resid;
			iovp->iov_len -= done;
			iovp->iov_base += done;
			uio->uio_offset += done;
			uio->uio_resid -= done;

			/* [ set error from the buffer's error code, and
			 *   do other miscellaneous cleanup on the buffer ]
			 */
			/* [ XXX this B_INVAL trick is bullshit for broken
			 *   vfs_bio where it wants to rehash a buf if
			 *   and error, but buf is not already B_INVAL ]
			 */
			if (nobuf && ((bp->b_flags & B_ERROR) || bp->b_error))
				bp->b_flags |= B_INVAL; /* XXX */
			error = biowait(bp);
			/* [ handle disks like they want to be handled ] */
			if (bp->b_flags & B_ERROR || done < todo)
				goto leave;
		} /* } */
	}

leave:
	if (nobuf) {	/* [ if we had to allocate it, get rid of it ] */
		if (bp->b_vp) /* [ have a vnode; dissociate from it ] */
			brelvp(bp);
		/* XXX any other buf fields which should be taken care of
		 * if they had values?
		 */
		free(bp, M_TEMP);
	} else {
		/* clean up the state of the buffer */
		wanted = bp->b_flags & B_WANTED;
		bp->b_flags &= ~(B_BUSY | B_WANTED | B_PHYS | B_RAW);

		/* if (another process is waiting for the raw I/O buffer) */
		if (wanted)
			/* wake up process waiting to do physical I/O */
			wakeup((caddr_t) bp);
	}

	/* [ finally, if there's an error, return it ] */
	return error;
}

/*
 * Calculate the maximum size of I/O request that can be requested
 * in a single operation. This limit is necessary to prevent a single
 * process from being able to lock more than a fixed amount of memory
 * in the kernel.
 */
u_int
minphys(bp)
	struct buf *bp;
{

	/*
	 * Body deleted.
	 *
	 * and reimplemented by cgd.
	 * Leffler, McKusick, et al., says on p. 231:
	 * "The minphys() routine is called by physio() to adjust the
	 * size of each I/O transfer before the latter is passed to
	 * the strategy routine..."
	 *
	 * so, just adjust the buffer's count accounting to MAXPHYS here,
	 * and return the new count;
	 */
	bp->b_bcount = min(MAXPHYS, bp->b_bcount);
	return bp->b_bcount;
}

/*
 * Do a read on a device for a user process.
 */
int
rawread(dev, uio)
	dev_t dev;
	struct uio *uio;
{
	return (physio(cdevsw[major(dev)].d_strategy, (struct buf *)NULL,
	    dev, B_READ, minphys, uio));
}

/*
 * Do a write on a device for a user process.
 */
int
rawwrite(dev, uio)
	dev_t dev;
	struct uio *uio;
{
	return (physio(cdevsw[major(dev)].d_strategy, (struct buf *)NULL,
	    dev, B_WRITE, minphys, uio));
}
