diff -upr linux-2.6.16.46-0.12.orig/COPYING.SWsoft linux-2.6.16.46-0.12-027test011/COPYING.SWsoft
--- linux-2.6.16.46-0.12.orig/COPYING.SWsoft	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/COPYING.SWsoft	2007-08-28 17:35:30.000000000 +0400
@@ -0,0 +1,350 @@
+
+Nothing in this license should be construed as a grant by SWsoft of any rights
+beyond the rights specified in the GNU General Public License, and nothing in
+this license should be construed as a waiver by SWsoft of its patent, copyright
+and/or trademark rights, beyond the waiver required by the GNU General Public
+License. This license is expressly inapplicable to any product that is not
+within the scope of the GNU General Public License
+
+----------------------------------------
+
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+                       59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+	    How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff -upr linux-2.6.16.46-0.12.orig/Documentation/filesystems/Locking linux-2.6.16.46-0.12-027test011/Documentation/filesystems/Locking
--- linux-2.6.16.46-0.12.orig/Documentation/filesystems/Locking	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/Documentation/filesystems/Locking	2007-08-28 17:35:30.000000000 +0400
@@ -170,6 +170,7 @@ prototypes:
 	int (*releasepage) (struct page *, int);
 	int (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
 			loff_t offset, unsigned long nr_segs);
+	int (*launder_page) (struct page *);
 
 locking rules:
 	All except set_page_dirty may block
@@ -187,6 +188,7 @@ bmap:			yes
 invalidatepage:		no	yes
 releasepage:		no	yes
 direct_IO:		no
+launder_page:		no	yes
 
 	->prepare_write(), ->commit_write(), ->sync_page() and ->readpage()
 may be called from the request handler (/dev/loop).
@@ -280,6 +282,12 @@ buffers from the page in preparation for
 indicate that the buffers are (or may be) freeable.  If ->releasepage is zero,
 the kernel assumes that the fs has no private interest in the buffers.
 
+	->launder_page() may be called prior to releasing a page if
+it is still found to be dirty. It returns zero if the page was successfully
+cleaned, or an error value if not. Note that in order to prevent the page
+getting mapped back in and redirtied, it needs to be kept locked
+across the entire operation.
+
 	Note: currently almost all instances of address_space methods are
 using BKL for internal serialization and that's one of the worst sources
 of contention. Normally they are calling library functions (in fs/buffer.c)
diff -upr linux-2.6.16.46-0.12.orig/Documentation/vsched.txt linux-2.6.16.46-0.12-027test011/Documentation/vsched.txt
--- linux-2.6.16.46-0.12.orig/Documentation/vsched.txt	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/Documentation/vsched.txt	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,83 @@
+Copyright (C) 2005 SWsoft. All rights reserved.
+Licensing governed by "linux/COPYING.SWsoft" file.
+
+Hierarchical CPU schedulers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Hierarchical CPU scheduler is a stack of CPU schedulers which allows
+to organize different policies of scheduling in the system and/or between
+groups of processes.
+
+Virtuozzo uses a hierarchical Fair CPU scheduler organized as a 2-stage
+CPU scheduler, where the scheduling decisions are made in 2 steps:
+1. On the first step Fair CPU scheduler selects a group of processes
+  which should get some CPU time.
+2. Then standard Linux scheduler chooses a process inside the group.
+Such scheduler efficiently allows to isolate one group of processes
+from another and still allows a group to use more than 1 CPU on SMP systems.
+
+This document describes a new middle layer of Virtuozzo hierarchical CPU
+scheduler which makes decisions after Fair scheduler, but before Linux
+scheduler and which is called VCPU scheduler.
+
+
+Where VCPU scheduler comes from?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Existing hierarchical CPU scheduler uses isolated algorithms on each stage
+of decision making, i.e. every scheduler makes its decisions without
+taking into account the details of other schedulers. This can lead to a number
+of problems described below.
+
+On SMP systems there are possible situations when the first CPU scheduler
+in the hierarchy (e.g. Fair scheduler) wants to schedule some group of
+processes on the physical CPU, but the underlying process scheduler
+(e.g. Linux O(1) CPU scheduler) is unable to schedule any processes
+on this physical CPU. Usually this happens due to the fact that Linux
+kernel scheduler uses per-physical CPU runqueues.
+
+Another problem is that Linux scheduler also knows nothing about
+Fair scheduler and can't balance efficiently without taking into account
+statistics about process groups from Fair scheduler. Without such
+statistics Linux scheduler can concentrate all processes on one physical
+CPU, thus making CPU consuming highly inefficient.
+
+VCPU scheduler solves these problems by adding a new layer between
+Fair schedule and Linux scheduler.
+
+VCPU scheduler
+~~~~~~~~~~~~~~
+
+VCPU scheduler is a CPU scheduler which splits notion of
+physical and virtual CPUs (VCPU and PCPU). This means that tasks are
+running on virtual CPU runqueues, while VCPUs are running on PCPUs.
+
+The Virtuozzo hierarchical fair scheduler becomes 3 stage CPU scheduler:
+1. First, Fair CPU scheduler select a group of processes.
+2. Then VCPU scheduler select a virtual CPU to run (this is actually
+  a runqueue).
+3. Standard Linux scheduler chooses a process from the runqueue.
+
+For example on the picture below PCPU0 executes tasks from
+VCPU1 runqueue and PCPU1 is idle:
+
+   virtual          |         physical       |          virtual
+  idle CPUs         |           CPUs         |           CPUS
+--------------------|------------------------|--------------------------
+                    |                        |     -----------------
+                    |                        |    | virtual sched X |
+                    |                        |    |   -----------   |
+                    |                        |    |  |   VCPU0   |  |
+                    |                        |    |   -----------   |
+ ------------       |        -----------          |   -----------   |
+| idle VCPU0 |      |       |   PCPU0   |  <--->  |  |   VCPU1   |  |
+ ------------       |        -----------          |   -----------   |
+                    |                        |     -----------------
+                    |                        |
+                    |                        |     -----------------
+                    |                        |    | virtual sched Y |
+ ------------                -----------     |    |   -----------   |
+| idle VCPU1 |    <--->     |   PCPU1   |    |    |  |   VCPU0   |  |
+ ------------                -----------     |    |   -----------   |
+                    |                        |     -----------------
+                    |                        |
diff -upr linux-2.6.16.46-0.12.orig/Makefile linux-2.6.16.46-0.12-027test011/Makefile
--- linux-2.6.16.46-0.12.orig/Makefile	2007-08-28 17:35:36.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/Makefile	2007-08-28 17:35:36.000000000 +0400
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 16
-EXTRAVERSION = .46
+EXTRAVERSION = -027test011
 NAME=Stable Penguin
 
 # *DOCUMENTATION*
diff -upr linux-2.6.16.46-0.12.orig/arch/alpha/kernel/osf_sys.c linux-2.6.16.46-0.12-027test011/arch/alpha/kernel/osf_sys.c
--- linux-2.6.16.46-0.12.orig/arch/alpha/kernel/osf_sys.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/alpha/kernel/osf_sys.c	2007-08-28 17:35:30.000000000 +0400
@@ -960,7 +960,7 @@ osf_utimes(char __user *filename, struct
 			return -EFAULT;
 	}
 
-	return do_utimes(AT_FDCWD, filename, tvs ? ktvs : NULL);
+	return do_utimes(AT_FDCWD, filename, tvs ? ktvs : NULL, 0);
 }
 
 #define MAX_SELECT_SECONDS \
diff -upr linux-2.6.16.46-0.12.orig/arch/arm/kernel/smp.c linux-2.6.16.46-0.12-027test011/arch/arm/kernel/smp.c
--- linux-2.6.16.46-0.12.orig/arch/arm/kernel/smp.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/arm/kernel/smp.c	2007-08-28 17:35:31.000000000 +0400
@@ -197,7 +197,7 @@ int __cpuexit __cpu_disable(void)
 	local_flush_tlb_all();
 
 	read_lock(&tasklist_lock);
-	for_each_process(p) {
+	for_each_process_all(p) {
 		if (p->mm)
 			cpu_clear(cpu, p->mm->cpu_vm_mask);
 	}
diff -upr linux-2.6.16.46-0.12.orig/arch/frv/mm/mmu-context.c linux-2.6.16.46-0.12-027test011/arch/frv/mm/mmu-context.c
--- linux-2.6.16.46-0.12.orig/arch/frv/mm/mmu-context.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/frv/mm/mmu-context.c	2007-08-28 17:35:31.000000000 +0400
@@ -181,7 +181,7 @@ int cxn_pin_by_pid(pid_t pid)
 
 	/* get a handle on the mm_struct */
 	read_lock(&tasklist_lock);
-	tsk = find_task_by_pid(pid);
+	tsk = find_task_by_pid_ve(pid);
 	if (tsk) {
 		ret = -EINVAL;
 
diff -upr linux-2.6.16.46-0.12.orig/arch/i386/Kconfig linux-2.6.16.46-0.12-027test011/arch/i386/Kconfig
--- linux-2.6.16.46-0.12.orig/arch/i386/Kconfig	2007-08-24 19:28:37.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/i386/Kconfig	2007-08-28 17:35:34.000000000 +0400
@@ -239,6 +239,8 @@ config NR_CPUS
 	  This is purely to save memory - each supported CPU adds
 	  approximately eight kilobytes to the kernel image.
 
+source "kernel/Kconfig.fairsched"
+
 config SCHED_SMT
 	bool "SMT (Hyperthreading) scheduler support"
 	depends on SMP && !X86_XEN
@@ -1153,6 +1155,8 @@ endmenu
 
 source "arch/i386/Kconfig.debug"
 
+source "kernel/Kconfig.openvz"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
@@ -1161,6 +1165,8 @@ source "drivers/xen/Kconfig"
 
 source "lib/Kconfig"
 
+source "kernel/ub/Kconfig"
+
 #
 # Use the generic interrupt handling code in kernel/irq/:
 #
diff -upr linux-2.6.16.46-0.12.orig/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c linux-2.6.16.46-0.12-027test011/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c
--- linux-2.6.16.46-0.12.orig/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c	2007-08-24 19:28:06.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c	2007-08-28 17:35:30.000000000 +0400
@@ -64,7 +64,7 @@ static int cpufreq_p4_setdc(unsigned int
 	if (!cpu_online(cpu) || (newstate > DC_DISABLE) || (newstate == DC_RESV))
 		return -EINVAL;
 
-	rdmsr(MSR_IA32_THERM_STATUS, l, h);
+	rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h);
 
 	if (l & 0x01)
 		dprintk("CPU#%d currently thermal throttled\n", cpu);
@@ -72,10 +72,10 @@ static int cpufreq_p4_setdc(unsigned int
 	if (has_N44_O17_errata[cpu] && (newstate == DC_25PT || newstate == DC_DFLT))
 		newstate = DC_38PT;
 
-	rdmsr(MSR_IA32_THERM_CONTROL, l, h);
+	rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h);
 	if (newstate == DC_DISABLE) {
 		dprintk("CPU#%d disabling modulation\n", cpu);
-		wrmsr(MSR_IA32_THERM_CONTROL, l & ~(1<<4), h);
+		wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l & ~(1<<4), h);
 	} else {
 		dprintk("CPU#%d setting duty cycle to %d%%\n",
 			cpu, ((125 * newstate) / 10));
@@ -86,7 +86,7 @@ static int cpufreq_p4_setdc(unsigned int
 		 */
 		l = (l & ~14);
 		l = l | (1<<4) | ((newstate & 0x7)<<1);
-		wrmsr(MSR_IA32_THERM_CONTROL, l, h);
+		wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l, h);
 	}
 
 	return 0;
@@ -113,7 +113,6 @@ static int cpufreq_p4_target(struct cpuf
 {
 	unsigned int    newstate = DC_RESV;
 	struct cpufreq_freqs freqs;
-	cpumask_t cpus_allowed;
 	int i;
 
 	if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0], target_freq, relation, &newstate))
@@ -134,17 +133,8 @@ static int cpufreq_p4_target(struct cpuf
 	/* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software
 	 * Developer's Manual, Volume 3 
 	 */
-	cpus_allowed = current->cpus_allowed;
-
-	for_each_cpu_mask(i, policy->cpus) {
-		cpumask_t this_cpu = cpumask_of_cpu(i);
-
-		set_cpus_allowed(current, this_cpu);
-		BUG_ON(smp_processor_id() != i);
-
+	for_each_cpu_mask(i, policy->cpus)
 		cpufreq_p4_setdc(i, p4clockmod_table[newstate].index);
-	}
-	set_cpus_allowed(current, cpus_allowed);
 
 	/* notifiers */
 	for_each_cpu_mask(i, policy->cpus) {
@@ -268,17 +258,9 @@ static int cpufreq_p4_cpu_exit(struct cp
 
 static unsigned int cpufreq_p4_get(unsigned int cpu)
 {
-	cpumask_t cpus_allowed;
 	u32 l, h;
 
-	cpus_allowed = current->cpus_allowed;
-
-	set_cpus_allowed(current, cpumask_of_cpu(cpu));
-	BUG_ON(smp_processor_id() != cpu);
-
-	rdmsr(MSR_IA32_THERM_CONTROL, l, h);
-
-	set_cpus_allowed(current, cpus_allowed);
+	rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h);
 
 	if (l & 0x10) {
 		l = l >> 1;
diff -upr linux-2.6.16.46-0.12.orig/arch/i386/kernel/cpu/cpufreq/powernow-k8.c linux-2.6.16.46-0.12-027test011/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
--- linux-2.6.16.46-0.12.orig/arch/i386/kernel/cpu/cpufreq/powernow-k8.c	2007-08-24 19:28:29.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/i386/kernel/cpu/cpufreq/powernow-k8.c	2007-08-28 17:35:30.000000000 +0400
@@ -84,17 +84,17 @@ static u32 find_khz_freq_from_fiddid(u32
 	return 1000 * find_freq_from_fiddid(fid, did);
 }
 
-static u32 find_fid_from_pstate(u32 pstate)
+static u32 find_fid_from_pstate(unsigned int cpu, u32 pstate)
 {
 	u32 hi, lo;
-	rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi);
+	rdmsr_on_cpu(cpu, MSR_PSTATE_DEF_BASE + pstate, &lo, &hi);
 	return lo & HW_PSTATE_FID_MASK;
 }
 
-static u32 find_did_from_pstate(u32 pstate)
+static u32 find_did_from_pstate(unsigned int cpu, u32 pstate)
 {
 	u32 hi, lo;
-	rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi);
+	rdmsr_on_cpu(cpu, MSR_PSTATE_DEF_BASE + pstate, &lo, &hi);
 	return (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT;
 }
 
@@ -116,14 +116,14 @@ static u32 convert_fid_to_vco_fid(u32 fi
  * Return 1 if the pending bit is set. Unless we just instructed the processor
  * to transition to a new state, seeing this bit set is really bad news.
  */
-static int pending_bit_stuck(void)
+static int pending_bit_stuck(unsigned int cpu)
 {
 	u32 lo, hi;
 
 	if (cpu_family == CPU_HW_PSTATE)
 		return 0;
 
-	rdmsr(MSR_FIDVID_STATUS, lo, hi);
+	rdmsr_on_cpu(cpu, MSR_FIDVID_STATUS, &lo, &hi);
 	return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0;
 }
 
@@ -133,13 +133,14 @@ static int pending_bit_stuck(void)
  */
 static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
 {
+	unsigned int cpu = data->cpu;
 	u32 lo, hi;
 	u32 i = 0;
 
 	if (cpu_family == CPU_HW_PSTATE) {
-		rdmsr(MSR_PSTATE_STATUS, lo, hi);
+		rdmsr_on_cpu(cpu, MSR_PSTATE_STATUS, &lo, &hi);
 		i = lo & HW_PSTATE_MASK;
-		rdmsr(MSR_PSTATE_DEF_BASE + i, lo, hi);
+		rdmsr_on_cpu(cpu, MSR_PSTATE_DEF_BASE + i, &lo, &hi);
 		data->currfid = lo & HW_PSTATE_FID_MASK;
 		data->currdid = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT;
 		return 0;
@@ -149,7 +150,7 @@ static int query_current_values_with_pen
 			dprintk("detected change pending stuck\n");
 			return 1;
 		}
-		rdmsr(MSR_FIDVID_STATUS, lo, hi);
+		rdmsr_on_cpu(cpu, MSR_FIDVID_STATUS, &lo, &hi);
 	} while (lo & MSR_S_LO_CHANGE_PENDING);
 
 	data->currvid = hi & MSR_S_HI_CURRENT_VID;
@@ -173,18 +174,18 @@ static void count_off_vst(struct powerno
 }
 
 /* need to init the control msr to a safe value (for each cpu) */
-static void fidvid_msr_init(void)
+static void fidvid_msr_init(unsigned int cpu)
 {
 	u32 lo, hi;
 	u8 fid, vid;
 
-	rdmsr(MSR_FIDVID_STATUS, lo, hi);
+	rdmsr_on_cpu(cpu, MSR_FIDVID_STATUS, &lo, &hi);
 	vid = hi & MSR_S_HI_CURRENT_VID;
 	fid = lo & MSR_S_LO_CURRENT_FID;
 	lo = fid | (vid << MSR_C_LO_VID_SHIFT);
 	hi = MSR_C_HI_STP_GNT_BENIGN;
 	dprintk("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi);
-	wrmsr(MSR_FIDVID_CTL, lo, hi);
+	wrmsr_on_cpu(cpu, MSR_FIDVID_CTL, lo, hi);
 }
 
 
@@ -291,8 +292,8 @@ static int decrease_vid_code_by_step(str
 /* Change hardware pstate by single MSR write */
 static int transition_pstate(struct powernow_k8_data *data, u32 pstate)
 {
-	wrmsr(MSR_PSTATE_CTRL, pstate, 0);
-	data->currfid = find_fid_from_pstate(pstate);
+	wrmsr_on_cpu(data->cpu, MSR_PSTATE_CTRL, pstate, 0);
+	data->currfid = find_fid_from_pstate(data->cpu, pstate);
 	return 0;
 }
 
@@ -335,7 +336,7 @@ static int core_voltage_pre_transition(s
 		smp_processor_id(),
 		data->currfid, data->currvid, reqvid, data->rvo);
 
-	rdmsr(MSR_FIDVID_STATUS, lo, maxvid);
+	rdmsr_on_cpu(data->cpu, MSR_FIDVID_STATUS, &lo, &maxvid);
 	maxvid = 0x1f & (maxvid >> 16);
 	dprintk("ph1 maxvid=0x%x\n", maxvid);
 	if (reqvid < maxvid) /* lower numbers are higher voltages */
@@ -499,22 +500,13 @@ static int core_voltage_post_transition(
 
 static int check_supported_cpu(unsigned int cpu)
 {
-	cpumask_t oldmask = CPU_MASK_ALL;
 	u32 eax, ebx, ecx, edx;
 	unsigned int rc = 0;
 
-	oldmask = current->cpus_allowed;
-	set_cpus_allowed(current, cpumask_of_cpu(cpu));
-
-	if (smp_processor_id() != cpu) {
-		printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu);
-		goto out;
-	}
-
-	if (current_cpu_data.x86_vendor != X86_VENDOR_AMD)
+	if (cpu_data[cpu].x86_vendor != X86_VENDOR_AMD)
 		goto out;
 
-	eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
+	eax = cpuid_eax_on_cpu(cpu, CPUID_PROCESSOR_SIGNATURE);
 	if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) &&
 	    ((eax & CPUID_XFAM) < CPUID_XFAM_10H))
 		goto out;
@@ -526,20 +518,20 @@ static int check_supported_cpu(unsigned 
 			goto out;
 		}
 
-		eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES);
+		eax = cpuid_eax_on_cpu(cpu, CPUID_GET_MAX_CAPABILITIES);
 		if (eax < CPUID_FREQ_VOLT_CAPABILITIES) {
 			printk(KERN_INFO PFX
 			       "No frequency change capabilities detected\n");
 			goto out;
 		}
 
-		cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
+		cpuid_on_cpu(cpu, CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
 		if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) {
 			printk(KERN_INFO PFX "Power state transitions not supported\n");
 			goto out;
 		}
 	} else { /* must be a HW Pstate capable processor */
-		cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
+		cpuid_on_cpu(cpu, CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
 		if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE)
 			cpu_family = CPU_HW_PSTATE;
 		else
@@ -549,7 +541,6 @@ static int check_supported_cpu(unsigned 
 	rc = 1;
 
 out:
-	set_cpus_allowed(current, oldmask);
 	return rc;
 }
 
@@ -849,7 +840,7 @@ static int fill_powernow_table_pstate(st
 			printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index);
 			printk(KERN_ERR PFX "Please report to BIOS manufacturer\n");
 		}
-		rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
+		rdmsr_on_cpu(data->cpu, MSR_PSTATE_DEF_BASE + index, &lo, &hi);
 		if (!(hi & HW_PSTATE_VALID_MASK)) {
 			dprintk("invalid pstate %d, ignoring\n", index);
 			powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
@@ -1035,8 +1026,8 @@ static int transition_frequency_pstate(s
 	}
 
 	res = transition_pstate(data, pstate);
-	data->currfid = find_fid_from_pstate(pstate);
-	data->currdid = find_did_from_pstate(pstate);
+	data->currfid = find_fid_from_pstate(data->cpu, pstate);
+	data->currdid = find_did_from_pstate(data->cpu, pstate);
 	freqs.new = find_khz_freq_from_fiddid(data->currfid, data->currdid);
 
 	for_each_cpu_mask(i, *(data->available_cores)) {
@@ -1049,7 +1040,6 @@ static int transition_frequency_pstate(s
 /* Driver entry point to switch to the target frequency */
 static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation)
 {
-	cpumask_t oldmask = CPU_MASK_ALL;
 	struct powernow_k8_data *data = powernow_data[pol->cpu];
 	u32 checkfid;
 	u32 checkvid;
@@ -1062,16 +1052,7 @@ static int powernowk8_target(struct cpuf
 	checkfid = data->currfid;
 	checkvid = data->currvid;
 
-	/* only run on specific CPU from here on */
-	oldmask = current->cpus_allowed;
-	set_cpus_allowed(current, cpumask_of_cpu(pol->cpu));
-
-	if (smp_processor_id() != pol->cpu) {
-		printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
-		goto err_out;
-	}
-
-	if (pending_bit_stuck()) {
+	if (pending_bit_stuck(pol->cpu)) {
 		printk(KERN_ERR PFX "failing targ, change pending bit set\n");
 		goto err_out;
 	}
@@ -1122,7 +1103,6 @@ static int powernowk8_target(struct cpuf
 	ret = 0;
 
 err_out:
-	set_cpus_allowed(current, oldmask);
 	return ret;
 }
 
@@ -1141,7 +1121,6 @@ static int powernowk8_verify(struct cpuf
 static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 {
 	struct powernow_k8_data *data;
-	cpumask_t oldmask = CPU_MASK_ALL;
 	int rc;
 
 	if (!cpu_online(pol->cpu))
@@ -1180,16 +1159,7 @@ static int __cpuinit powernowk8_cpu_init
 		}
 	}
 
-	/* only run on specific CPU from here on */
-	oldmask = current->cpus_allowed;
-	set_cpus_allowed(current, cpumask_of_cpu(pol->cpu));
-
-	if (smp_processor_id() != pol->cpu) {
-		printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
-		goto err_out;
-	}
-
-	if (pending_bit_stuck()) {
+	if (pending_bit_stuck(pol->cpu)) {
 		printk(KERN_ERR PFX "failing init, change pending bit set\n");
 		goto err_out;
 	}
@@ -1198,10 +1168,7 @@ static int __cpuinit powernowk8_cpu_init
 		goto err_out;
 
 	if (cpu_family == CPU_OPTERON)
-		fidvid_msr_init();
-
-	/* run on any CPU again */
-	set_cpus_allowed(current, oldmask);
+		fidvid_msr_init(pol->cpu);
 
 	pol->governor = CPUFREQ_DEFAULT_GOVERNOR;
 	if (cpu_family == CPU_HW_PSTATE)
@@ -1244,7 +1211,6 @@ static int __cpuinit powernowk8_cpu_init
 	return 0;
 
 err_out:
-	set_cpus_allowed(current, oldmask);
 	powernow_k8_cpu_exit_acpi(data);
 
 	kfree(data);
@@ -1271,7 +1237,6 @@ static int __devexit powernowk8_cpu_exit
 static unsigned int powernowk8_get (unsigned int cpu)
 {
 	struct powernow_k8_data *data;
-	cpumask_t oldmask = current->cpus_allowed;
 	unsigned int khz = 0;
 
 	data = powernow_data[first_cpu(cpu_core_map[cpu])];
@@ -1279,13 +1244,6 @@ static unsigned int powernowk8_get (unsi
 	if (!data)
 		return -EINVAL;
 
-	set_cpus_allowed(current, cpumask_of_cpu(cpu));
-	if (smp_processor_id() != cpu) {
-		printk(KERN_ERR PFX "limiting to CPU %d failed in powernowk8_get\n", cpu);
-		set_cpus_allowed(current, oldmask);
-		return 0;
-	}
-
 	if (query_current_values_with_pending_wait(data))
 		goto out;
 
@@ -1296,7 +1254,6 @@ static unsigned int powernowk8_get (unsi
 	
 
 out:
-	set_cpus_allowed(current, oldmask);
 	return khz;
 }
 
diff -upr linux-2.6.16.46-0.12.orig/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c linux-2.6.16.46-0.12-027test011/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
--- linux-2.6.16.46-0.12.orig/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c	2007-08-28 17:35:34.000000000 +0400
@@ -323,14 +323,8 @@ static unsigned int get_cur_freq(unsigne
 {
 	unsigned l, h;
 	unsigned clock_freq;
-	cpumask_t saved_mask;
 
-	saved_mask = current->cpus_allowed;
-	set_cpus_allowed(current, cpumask_of_cpu(cpu));
-	if (smp_processor_id() != cpu)
-		return 0;
-
-	rdmsr(MSR_IA32_PERF_STATUS, l, h);
+	rdmsr_on_cpu(cpu, MSR_IA32_PERF_STATUS, &l, &h);
 	clock_freq = extract_clock(l, cpu, 0);
 
 	if (unlikely(clock_freq == 0)) {
@@ -340,11 +334,10 @@ static unsigned int get_cur_freq(unsigne
 		 * P-state transition (like TM2). Get the last freq set 
 		 * in PERF_CTL.
 		 */
-		rdmsr(MSR_IA32_PERF_CTL, l, h);
+		rdmsr_on_cpu(cpu, MSR_IA32_PERF_CTL, &l, &h);
 		clock_freq = extract_clock(l, cpu, 1);
 	}
 
-	set_cpus_allowed(current, saved_mask);
 	return clock_freq;
 }
 
@@ -514,15 +507,15 @@ static int centrino_cpu_init(struct cpuf
 
 	/* Check to see if Enhanced SpeedStep is enabled, and try to
 	   enable it if not. */
-	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+	rdmsr_on_cpu(policy->cpu, MSR_IA32_MISC_ENABLE, &l, &h);
 
 	if (!(l & (1<<16))) {
 		l |= (1<<16);
 		dprintk("trying to enable Enhanced SpeedStep (%x)\n", l);
-		wrmsr(MSR_IA32_MISC_ENABLE, l, h);
+		wrmsr_on_cpu(policy->cpu, MSR_IA32_MISC_ENABLE, l, h);
 
 		/* check to see if it stuck */
-		rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+		rdmsr_on_cpu(policy->cpu, MSR_IA32_MISC_ENABLE, &l, &h);
 		if (!(l & (1<<16))) {
 			printk(KERN_INFO PFX "couldn't enable Enhanced SpeedStep\n");
 			return -ENODEV;
@@ -596,7 +589,6 @@ static int centrino_target (struct cpufr
 	unsigned int    newstate = 0;
 	unsigned int	msr, oldmsr, h, cpu = policy->cpu;
 	struct cpufreq_freqs	freqs;
-	cpumask_t		saved_mask;
 	int			retval;
 
 	if (centrino_model[cpu] == NULL)
@@ -606,8 +598,6 @@ static int centrino_target (struct cpufr
 	 * Support for SMP systems.
 	 * Make sure we are running on the CPU that wants to change frequency
 	 */
-	saved_mask = current->cpus_allowed;
-	set_cpus_allowed(current, policy->cpus);
 	if (!cpu_isset(smp_processor_id(), policy->cpus)) {
 		dprintk("couldn't limit to CPUs in this domain\n");
 		return(-EAGAIN);
@@ -620,7 +610,7 @@ static int centrino_target (struct cpufr
 	}
 
 	msr = centrino_model[cpu]->op_points[newstate].index;
-	rdmsr(MSR_IA32_PERF_CTL, oldmsr, h);
+	rdmsr_on_cpu(cpu, MSR_IA32_PERF_CTL, &oldmsr, &h);
 
 	if (msr == (oldmsr & 0xffff)) {
 		retval = 0;
@@ -643,13 +633,12 @@ static int centrino_target (struct cpufr
 	msr &= 0xffff;
 	oldmsr |= msr;
 
-	wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
+	wrmsr_on_cpu(cpu, MSR_IA32_PERF_CTL, oldmsr, h);
 
 	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
 
 	retval = 0;
 migrate_end:
-	set_cpus_allowed(current, saved_mask);
 	return (retval);
 }
 
diff -upr linux-2.6.16.46-0.12.orig/arch/i386/kernel/cpu/mtrr/if.c linux-2.6.16.46-0.12-027test011/arch/i386/kernel/cpu/mtrr/if.c
--- linux-2.6.16.46-0.12.orig/arch/i386/kernel/cpu/mtrr/if.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/i386/kernel/cpu/mtrr/if.c	2007-08-28 17:35:31.000000000 +0400
@@ -392,7 +392,7 @@ static int __init mtrr_if_init(void)
 		return -ENODEV;
 
 	proc_root_mtrr =
-	    create_proc_entry("mtrr", S_IWUSR | S_IRUGO, &proc_root);
+	    create_proc_entry("mtrr", S_IWUSR | S_IRUGO, NULL);
 	if (proc_root_mtrr) {
 		proc_root_mtrr->owner = THIS_MODULE;
 		proc_root_mtrr->proc_fops = &mtrr_fops;
diff -upr linux-2.6.16.46-0.12.orig/arch/i386/kernel/cpu/proc.c linux-2.6.16.46-0.12-027test011/arch/i386/kernel/cpu/proc.c
--- linux-2.6.16.46-0.12.orig/arch/i386/kernel/cpu/proc.c	2007-08-24 19:28:32.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/i386/kernel/cpu/proc.c	2007-08-28 17:35:34.000000000 +0400
@@ -4,6 +4,7 @@
 #include <asm/semaphore.h>
 #include <linux/seq_file.h>
 #include <linux/cpufreq.h>
+#include <linux/vsched.h>
 
 /*
  *	Get CPU information for use by the procfs.
@@ -80,7 +81,7 @@ static int show_cpuinfo(struct seq_file 
 	int fpu_exception;
 
 #ifdef CONFIG_SMP
-	if (!cpu_online(n))
+	if (!vcpu_online(n))
 		return 0;
 #endif
 	seq_printf(m, "processor\t: %d\n"
@@ -100,9 +101,13 @@ static int show_cpuinfo(struct seq_file 
 		seq_printf(m, "stepping\t: unknown\n");
 
 	if ( cpu_has(c, X86_FEATURE_TSC) ) {
+#ifndef CONFIG_FAIRSCHED
 		unsigned int freq = cpufreq_quick_get(n);
 		if (!freq)
 			freq = cpu_khz;
+#else
+		unsigned int freq = ve_scale_khz(cpu_khz);
+#endif
 		seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
 			freq / 1000, (freq % 1000));
 	}
diff -upr linux-2.6.16.46-0.12.orig/arch/i386/kernel/entry.S linux-2.6.16.46-0.12-027test011/arch/i386/kernel/entry.S
--- linux-2.6.16.46-0.12.orig/arch/i386/kernel/entry.S	2007-08-24 19:28:35.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/i386/kernel/entry.S	2007-08-28 17:35:34.000000000 +0400
@@ -128,6 +128,7 @@ ENTRY(ret_from_fork)
 	call schedule_tail
 	GET_THREAD_INFO(%ebp)
 	popl %eax
+ret_from_fork_tail:
 	pushl $0x0202			# Reset kernel eflags
 	popfl
 	jmp syscall_exit
@@ -144,6 +145,21 @@ ENTRY(kdb_call)
 	jmp restore_all
 #endif
 
+ENTRY(i386_ret_from_resume)
+	pushl %eax
+	call schedule_tail
+	GET_THREAD_INFO(%ebp)
+	popl %eax
+	movl (%esp),%eax
+	testl %eax,%eax
+	jz    1f
+	pushl %esp
+	call  *%eax
+	addl  $4,%esp
+1:
+	addl  $256,%esp
+	jmp   ret_from_fork_tail
+
 /*
  * Return to user mode is not as complex as all this looks,
  * but we want the default path for a system call return to
@@ -157,6 +173,7 @@ ret_from_exception:
 	preempt_stop
 ret_from_intr:
 	GET_THREAD_INFO(%ebp)
+check_userspace:	
 	movl EFLAGS(%esp), %eax		# mix EFLAGS and CS
 	movb CS(%esp), %al
 	testl $(VM_MASK | 3), %eax
@@ -333,7 +350,7 @@ work_notifysig:				# deal with pending s
 					# vm86-space
 	xorl %edx, %edx
 	call do_notify_resume
-	jmp resume_userspace
+	jmp check_userspace
 
 	ALIGN
 work_notifysig_v86:
@@ -344,7 +361,7 @@ work_notifysig_v86:
 	movl %eax, %esp
 	xorl %edx, %edx
 	call do_notify_resume
-	jmp resume_userspace
+	jmp check_userspace
 #endif
 
 	# perform syscall exit tracing
diff -upr linux-2.6.16.46-0.12.orig/arch/i386/kernel/i386_ksyms.c linux-2.6.16.46-0.12-027test011/arch/i386/kernel/i386_ksyms.c
--- linux-2.6.16.46-0.12.orig/arch/i386/kernel/i386_ksyms.c	2007-08-24 19:28:27.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/i386/kernel/i386_ksyms.c	2007-08-28 17:35:34.000000000 +0400
@@ -4,6 +4,7 @@
 #include <linux/irq.h>
 #include <asm/checksum.h>
 #include <asm/desc.h>
+#include <asm/pgtable.h>
 
 EXPORT_SYMBOL(__down_failed);
 EXPORT_SYMBOL(__down_failed_interruptible);
@@ -21,6 +22,8 @@ EXPORT_SYMBOL(__put_user_2);
 EXPORT_SYMBOL(__put_user_4);
 EXPORT_SYMBOL(__put_user_8);
 
+EXPORT_SYMBOL(empty_zero_page);
+
 EXPORT_SYMBOL(strstr);
 
 #ifdef CONFIG_SMP
diff -upr linux-2.6.16.46-0.12.orig/arch/i386/kernel/ldt.c linux-2.6.16.46-0.12-027test011/arch/i386/kernel/ldt.c
--- linux-2.6.16.46-0.12.orig/arch/i386/kernel/ldt.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/i386/kernel/ldt.c	2007-08-28 17:35:33.000000000 +0400
@@ -13,6 +13,7 @@
 #include <linux/smp_lock.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
+#include <linux/module.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -20,6 +21,8 @@
 #include <asm/desc.h>
 #include <asm/mmu_context.h>
 
+#include <ub/ub_mem.h>
+
 #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
 static void flush_ldt(void *null)
 {
@@ -39,9 +42,9 @@ static int alloc_ldt(mm_context_t *pc, i
 	oldsize = pc->size;
 	mincount = (mincount+511)&(~511);
 	if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
-		newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
+		newldt = ub_vmalloc(mincount*LDT_ENTRY_SIZE);
 	else
-		newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
+		newldt = ub_kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
 
 	if (!newldt)
 		return -ENOMEM;
@@ -105,6 +108,7 @@ int init_new_context(struct task_struct 
 	}
 	return retval;
 }
+EXPORT_SYMBOL_GPL(init_new_context);
 
 /*
  * No need to lock the MM as we are the last user
@@ -251,3 +255,5 @@ asmlinkage int sys_modify_ldt(int func, 
 	}
 	return ret;
 }
+
+EXPORT_SYMBOL_GPL(default_ldt);
diff -upr linux-2.6.16.46-0.12.orig/arch/i386/kernel/nmi.c linux-2.6.16.46-0.12-027test011/arch/i386/kernel/nmi.c
--- linux-2.6.16.46-0.12.orig/arch/i386/kernel/nmi.c	2007-08-24 19:28:37.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/i386/kernel/nmi.c	2007-08-28 17:35:30.000000000 +0400
@@ -524,7 +524,22 @@ void touch_nmi_watchdog (void)
 	touch_softlockup_watchdog();
 }
 
-void nmi_watchdog_tick (struct pt_regs * regs)
+void smp_show_regs(struct pt_regs *regs, void *info)
+{
+	static DEFINE_SPINLOCK(show_regs_lock);
+
+	if (regs == NULL)
+		return;
+
+	bust_spinlocks(1);
+	spin_lock(&show_regs_lock);
+	printk("----------- IPI show regs -----------");
+	show_regs(regs);
+	spin_unlock(&show_regs_lock);
+	bust_spinlocks(0);
+}
+
+void nmi_watchdog_tick(struct pt_regs *regs)
 {
 
 	/*
@@ -539,10 +554,10 @@ void nmi_watchdog_tick (struct pt_regs *
 	if (last_irq_sums[cpu] == sum) {
 		/*
 		 * Ayiee, looks like this CPU is stuck ...
-		 * wait a few IRQs (5 seconds) before doing the oops ...
+		 * wait a few IRQs (30 seconds) before doing the oops ...
 		 */
 		alert_counter[cpu]++;
-		if (alert_counter[cpu] == 5*nmi_hz)
+		if (alert_counter[cpu] == 30*nmi_hz)
 			/*
 			 * die_nmi will return ONLY if NOTIFY_STOP happens..
 			 */
diff -upr linux-2.6.16.46-0.12.orig/arch/i386/kernel/process.c linux-2.6.16.46-0.12-027test011/arch/i386/kernel/process.c
--- linux-2.6.16.46-0.12.orig/arch/i386/kernel/process.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/i386/kernel/process.c	2007-08-28 17:35:34.000000000 +0400
@@ -38,6 +38,7 @@
 #include <linux/kallsyms.h>
 #include <linux/ptrace.h>
 #include <linux/random.h>
+#include <linux/sysctl.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -58,6 +59,9 @@
 #include <asm/cpu.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+EXPORT_SYMBOL_GPL(ret_from_fork);
+asmlinkage void i386_ret_from_resume(void) __asm__("i386_ret_from_resume");
+EXPORT_SYMBOL_GPL(i386_ret_from_resume);
 
 static int hlt_counter;
 
@@ -288,11 +292,15 @@ __setup("idle=", idle_setup);
 void show_regs(struct pt_regs * regs)
 {
 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
+	extern int die_counter;
 
 	printk("\n");
-	printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
-	printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
-	print_symbol("EIP is at %s\n", regs->eip);
+	printk("Pid: %d, comm: %20s, oopses: %d\n",
+			current->pid, current->comm, die_counter);
+	printk("EIP: %04x:[<%08lx>] CPU: %d, VCPU: %d:%d\n",0xffff & regs->xcs,regs->eip, smp_processor_id(),
+			task_vsched_id(current), task_cpu(current));
+	if (decode_call_traces)
+		print_symbol("EIP is at %s\n", regs->eip);
 
 	if (user_mode(regs))
 		printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
@@ -313,6 +321,8 @@ void show_regs(struct pt_regs * regs)
 	cr4 = read_cr4_safe();
 	printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
 	show_trace(NULL, &regs->esp);
+	if (!decode_call_traces)
+		printk(" EIP: [<%08lx>]\n",regs->eip);
 }
 
 /*
@@ -330,6 +340,7 @@ __asm__(".section .text\n"
 	"pushl %eax\n\t"
 	"call do_exit\n"
 	".previous");
+EXPORT_SYMBOL(kernel_thread_helper);
 
 /*
  * Create a kernel thread
@@ -338,6 +349,13 @@ int kernel_thread(int (*fn)(void *), voi
 {
 	struct pt_regs regs;
 
+	/* Don't allow kernel_thread() inside VE */
+	if (!ve_allow_kthreads && !ve_is_super(get_exec_env())) {
+		printk("kernel_thread call inside VE\n");
+		dump_stack();
+		return -EPERM;
+	}
+
 	memset(&regs, 0, sizeof(regs));
 
 	regs.ebx = (unsigned long) fn;
diff -upr linux-2.6.16.46-0.12.orig/arch/i386/kernel/ptrace.c linux-2.6.16.46-0.12-027test011/arch/i386/kernel/ptrace.c
--- linux-2.6.16.46-0.12.orig/arch/i386/kernel/ptrace.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/i386/kernel/ptrace.c	2007-08-28 17:35:33.000000000 +0400
@@ -706,7 +706,9 @@ int do_syscall_trace(struct pt_regs *reg
 	/* the 0x80 provides a way for the tracing parent to distinguish
 	   between a syscall stop and SIGTRAP delivery */
 	/* Note that the debugger could change the result of test_thread_flag!*/
+	set_pn_state(current, entryexit ? PN_STOP_LEAVE : PN_STOP_ENTRY);
 	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0));
+	clear_pn_state(current);
 
 	/*
 	 * this isn't the same as continuing with a signal, but it will do
diff -upr linux-2.6.16.46-0.12.orig/arch/i386/kernel/reboot_fixups.c linux-2.6.16.46-0.12-027test011/arch/i386/kernel/reboot_fixups.c
--- linux-2.6.16.46-0.12.orig/arch/i386/kernel/reboot_fixups.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/i386/kernel/reboot_fixups.c	2007-08-28 17:35:30.000000000 +0400
@@ -11,6 +11,7 @@
 #include <asm/delay.h>
 #include <linux/pci.h>
 #include <linux/reboot_fixups.h>
+#include <linux/interrupt.h>
 
 static void cs5530a_warm_reset(struct pci_dev *dev)
 {
@@ -43,6 +44,11 @@ void mach_reboot_fixups(void)
 	struct pci_dev *dev;
 	int i;
 
+	/* we can be called from sysrq-B code. In such a case it is
+	 * prohibited to dig PCI */
+	if (in_interrupt())
+		return;
+
 	for (i=0; i < ARRAY_SIZE(fixups_table); i++) {
 		cur = &(fixups_table[i]);
 		dev = pci_get_device(cur->vendor, cur->device, NULL);
diff -upr linux-2.6.16.46-0.12.orig/arch/i386/kernel/signal.c linux-2.6.16.46-0.12-027test011/arch/i386/kernel/signal.c
--- linux-2.6.16.46-0.12.orig/arch/i386/kernel/signal.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/i386/kernel/signal.c	2007-08-28 17:35:29.000000000 +0400
@@ -582,7 +582,7 @@ static void fastcall do_signal(struct pt
 	if (!user_mode(regs))
 		return;
 
-	if (try_to_freeze())
+	if (try_to_freeze() && !signal_pending(current))
 		goto no_signal;
 
 	if (test_thread_flag(TIF_RESTORE_SIGMASK))
diff -upr linux-2.6.16.46-0.12.orig/arch/i386/kernel/smp.c linux-2.6.16.46-0.12-027test011/arch/i386/kernel/smp.c
--- linux-2.6.16.46-0.12.orig/arch/i386/kernel/smp.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/i386/kernel/smp.c	2007-08-28 17:35:33.000000000 +0400
@@ -22,6 +22,7 @@
 #include <linux/module.h>
 #include <linux/dump.h>
 
+#include <asm/nmi.h>
 #include <asm/mtrr.h>
 #include <asm/tlbflush.h>
 #include <mach_apic.h>
@@ -472,6 +473,8 @@ void flush_tlb_mm (struct mm_struct * mm
 	preempt_enable();
 }
 
+EXPORT_SYMBOL(flush_tlb_mm);
+
 void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
 {
 	struct mm_struct *mm = vma->vm_mm;
@@ -620,6 +623,89 @@ int smp_call_function (void (*func) (voi
 }
 EXPORT_SYMBOL(smp_call_function);
 
+static spinlock_t nmi_call_lock = SPIN_LOCK_UNLOCKED;
+static struct nmi_call_data_struct {
+	smp_nmi_function func;
+	void *info;
+	atomic_t started;
+	atomic_t finished;
+	cpumask_t cpus_called;
+	int wait;
+} *nmi_call_data;
+
+static int smp_nmi_callback(struct pt_regs * regs, int cpu)
+{
+	smp_nmi_function func;
+	void *info;
+	int wait;
+
+	func = nmi_call_data->func;
+	info = nmi_call_data->info;
+	wait = nmi_call_data->wait;
+	ack_APIC_irq();
+	/* prevent from calling func() multiple times */
+	if (cpu_test_and_set(cpu, nmi_call_data->cpus_called))
+		return 0;
+	/*
+	 * notify initiating CPU that I've grabbed the data and am
+	 * about to execute the function
+	 */
+	mb();
+	atomic_inc(&nmi_call_data->started);
+	/* at this point the nmi_call_data structure is out of scope */
+	irq_enter();
+	func(regs, info);
+	irq_exit();
+	if (wait)
+		atomic_inc(&nmi_call_data->finished);
+
+	return 0;
+}
+
+/*
+ * This function tries to call func(regs, info) on each cpu.
+ * Func must be fast and non-blocking.
+ * May be called with disabled interrupts and from any context.
+ */
+int smp_nmi_call_function(smp_nmi_function func, void *info, int wait)
+{
+	struct nmi_call_data_struct data;
+	int cpus;
+
+	cpus = num_online_cpus() - 1;
+	if (!cpus)
+		return 0;
+
+	data.func = func;
+	data.info = info;
+	data.wait = wait;
+	atomic_set(&data.started, 0);
+	atomic_set(&data.finished, 0);
+	cpus_clear(data.cpus_called);
+	/* prevent this cpu from calling func if NMI happens */
+	cpu_set(smp_processor_id(), data.cpus_called);
+
+	if (!spin_trylock(&nmi_call_lock))
+		return -1;
+
+	nmi_call_data = &data;
+	set_nmi_ipi_callback(smp_nmi_callback);
+	mb();
+
+	/* Send a message to all other CPUs and wait for them to respond */
+	send_IPI_allbutself(APIC_DM_NMI);
+	while (atomic_read(&data.started) != cpus)
+		barrier();
+
+	unset_nmi_ipi_callback();
+	if (wait)
+		while (atomic_read(&data.finished) != cpus)
+			barrier();
+	spin_unlock(&nmi_call_lock);
+
+	return 0;
+}
+
 void stop_this_cpu (void * dummy)
 {
 	/*
diff -upr linux-2.6.16.46-0.12.orig/arch/i386/kernel/smpboot.c linux-2.6.16.46-0.12-027test011/arch/i386/kernel/smpboot.c
--- linux-2.6.16.46-0.12.orig/arch/i386/kernel/smpboot.c	2007-08-24 19:28:35.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/i386/kernel/smpboot.c	2007-08-28 17:35:31.000000000 +0400
@@ -323,6 +323,10 @@ static void __init synchronize_tsc_bp (v
 	}
 	if (!buggy)
 		printk("passed.\n");
+#ifdef CONFIG_VE
+	/* TSC reset. kill whatever might rely on old values */
+	VE_TASK_INFO(current)->wakeup_stamp = 0;
+#endif
 }
 
 static void __init synchronize_tsc_ap (void)
@@ -348,6 +352,10 @@ static void __init synchronize_tsc_ap (v
 		atomic_inc(&tsc_count_stop);
 		while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
 	}
+#ifdef CONFIG_VE
+	/* TSC reset. kill whatever might rely on old values */
+	VE_TASK_INFO(current)->wakeup_stamp = 0;
+#endif
 }
 #undef NR_LOOPS
 
@@ -940,6 +948,13 @@ static int __devinit do_boot_cpu(int api
 	if (IS_ERR(idle))
 		panic("failed fork for CPU %d", cpu);
 	idle->thread.eip = (unsigned long) start_secondary;
+
+#ifdef CONFIG_VE
+	/* Cosmetic: sleep_time won't be changed afterwards for the idle
+	* thread;  keep it 0 rather than -cycles. */
+	VE_TASK_INFO(idle)->sleep_time = 0;
+#endif
+
 	/* start_eip had better be page-aligned! */
 	start_eip = setup_trampoline();
 
diff -upr linux-2.6.16.46-0.12.orig/arch/i386/kernel/sys_i386.c linux-2.6.16.46-0.12-027test011/arch/i386/kernel/sys_i386.c
--- linux-2.6.16.46-0.12.orig/arch/i386/kernel/sys_i386.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/i386/kernel/sys_i386.c	2007-08-28 17:35:31.000000000 +0400
@@ -217,7 +217,7 @@ asmlinkage int sys_uname(struct old_utsn
 	if (!name)
 		return -EFAULT;
 	down_read(&uts_sem);
-	err=copy_to_user(name, &system_utsname, sizeof (*name));
+	err=copy_to_user(name, &ve_utsname, sizeof (*name));
 	up_read(&uts_sem);
 	return err?-EFAULT:0;
 }
@@ -233,15 +233,15 @@ asmlinkage int sys_olduname(struct oldol
   
   	down_read(&uts_sem);
 	
-	error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
+	error = __copy_to_user(name->sysname,ve_utsname.sysname,__OLD_UTS_LEN);
 	error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
-	error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
+	error |= __copy_to_user(name->nodename,ve_utsname.nodename,__OLD_UTS_LEN);
 	error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
-	error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
+	error |= __copy_to_user(name->release,ve_utsname.release,__OLD_UTS_LEN);
 	error |= __put_user(0,name->release+__OLD_UTS_LEN);
-	error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
+	error |= __copy_to_user(name->version,ve_utsname.version,__OLD_UTS_LEN);
 	error |= __put_user(0,name->version+__OLD_UTS_LEN);
-	error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
+	error |= __copy_to_user(name->machine,ve_utsname.machine,__OLD_UTS_LEN);
 	error |= __put_user(0,name->machine+__OLD_UTS_LEN);
 	
 	up_read(&uts_sem);
diff -upr linux-2.6.16.46-0.12.orig/arch/i386/kernel/syscall_table.S linux-2.6.16.46-0.12-027test011/arch/i386/kernel/syscall_table.S
--- linux-2.6.16.46-0.12.orig/arch/i386/kernel/syscall_table.S	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/i386/kernel/syscall_table.S	2007-08-28 17:35:34.000000000 +0400
@@ -310,3 +310,24 @@ ENTRY(sys_call_table)
 	.long sys_pselect6
 	.long sys_ppoll
 	.long sys_unshare		/* 310 */
+	.rept 500-(.-sys_call_table)/4
+		.long sys_ni_syscall
+	.endr
+	.long sys_fairsched_mknod	/* 500 */
+	.long sys_fairsched_rmnod
+	.long sys_fairsched_chwt
+	.long sys_fairsched_mvpr
+	.long sys_fairsched_rate
+	.long sys_fairsched_vcpus
+	.rept 510-(.-sys_call_table)/4
+	.long sys_ni_syscall
+	.endr
+	.long sys_getluid		/* 510 */
+	.long sys_setluid
+	.long sys_setublimit
+	.long sys_ubstat
+	.rept 516-(.-sys_call_table)/4
+		.long sys_ni_syscall
+	.endr
+	.long sys_lchmod		/* 516 */
+	.long sys_lutime
diff -upr linux-2.6.16.46-0.12.orig/arch/i386/kernel/timers/timer_tsc.c linux-2.6.16.46-0.12-027test011/arch/i386/kernel/timers/timer_tsc.c
--- linux-2.6.16.46-0.12.orig/arch/i386/kernel/timers/timer_tsc.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/i386/kernel/timers/timer_tsc.c	2007-08-28 17:35:31.000000000 +0400
@@ -94,7 +94,7 @@ static int count2; /* counter for mark_o
  * Equal to 2^32 * (1 / (clocks per usec) ).
  * Initialized in time_init.
  */
-static unsigned long fast_gettimeoffset_quotient;
+unsigned long fast_gettimeoffset_quotient;
 
 static unsigned long get_offset_tsc(void)
 {
diff -upr linux-2.6.16.46-0.12.orig/arch/i386/kernel/traps.c linux-2.6.16.46-0.12-027test011/arch/i386/kernel/traps.c
--- linux-2.6.16.46-0.12.orig/arch/i386/kernel/traps.c	2007-08-24 19:28:33.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/i386/kernel/traps.c	2007-08-28 17:35:36.000000000 +0400
@@ -127,8 +127,10 @@ static void print_addr_and_symbol(unsign
 {
 	printk(log_lvl);
 	printk(" [<%08lx>] ", addr);
-	print_symbol("%s", addr);
-	printk("\n");
+	if (decode_call_traces) {
+		print_symbol("%s", addr);
+		printk("\n");
+	}
 }
 
 static inline unsigned long print_context_stack(struct thread_info *tinfo,
@@ -178,7 +180,10 @@ static void show_trace_log_lvl(struct ta
 		if (!stack)
 			break;
 		printk(log_lvl);
-		printk(" =======================\n");
+		if (decode_call_traces)
+			printk(" =======================\n");
+		else
+			printk(" =<ctx>= ");
 	}
 }
 
@@ -214,8 +219,13 @@ static void show_stack_log_lvl(struct ta
 	}
 	printk("\n");
 	printk(log_lvl);
-	printk("Call Trace:\n");
+	if (decode_call_traces)
+		printk("Call Trace:\n");
+	else
+		printk("Call Trace: ");
 	show_trace_log_lvl(task, esp, log_lvl);
+	if (!decode_call_traces)
+		printk("\n");
 }
 
 void show_stack(struct task_struct *task, unsigned long *esp)
@@ -223,6 +233,8 @@ void show_stack(struct task_struct *task
 	show_stack_log_lvl(task, esp, "");
 }
 
+EXPORT_SYMBOL(show_stack);
+
 /*
  * The architecture-independent dump_stack generator
  */
@@ -231,6 +243,8 @@ void dump_stack(void)
 	unsigned long stack;
 
 	show_trace(current, &stack);
+	if (!decode_call_traces)
+		printk("\n");
 }
 
 EXPORT_SYMBOL(dump_stack);
@@ -250,9 +264,10 @@ void show_registers(struct pt_regs *regs
 		ss = regs->xss & 0xffff;
 	}
 	print_modules();
-	printk(KERN_EMERG "CPU:    %d\nEIP:    %04x:[<%08lx>]    %s VLI\n"
+	printk(KERN_EMERG "CPU:    %d, VCPU: %d:%d\nEIP:    %04x:[<%08lx>]    %s VLI\n"
 			"EFLAGS: %08lx   (%s %.*s) \n",
-		smp_processor_id(), 0xffff & regs->xcs, regs->eip,
+		smp_processor_id(), task_vsched_id(current), task_cpu(current),
+		0xffff & regs->xcs, regs->eip,
 		print_tainted(), regs->eflags, system_utsname.release,
 		(int)strcspn(system_utsname.version, " "),
 		system_utsname.version);
@@ -263,8 +278,11 @@ void show_registers(struct pt_regs *regs
 		regs->esi, regs->edi, regs->ebp, esp);
 	printk(KERN_EMERG "ds: %04x   es: %04x   ss: %04x\n",
 		regs->xds & 0xffff, regs->xes & 0xffff, ss);
-	printk(KERN_EMERG "Process %s (pid: %d, threadinfo=%p task=%p)",
-		current->comm, current->pid, current_thread_info(), current);
+	printk(KERN_EMERG "Process %s (pid: %d, veid=%d, threadinfo=%p task=%p)",
+		current->comm, current->pid,
+		VEID(VE_TASK_INFO(current)->owner_env),
+		current_thread_info(), current);
+
 	/*
 	 * When in-kernel, we also print out the stack and code at the
 	 * time of the fault..
@@ -310,9 +328,9 @@ static void handle_BUG(struct pt_regs *r
 		goto no_bug;
 	if (ud2 != 0x0b0f)
 		goto no_bug;
-	if (__get_user(line, (unsigned short __user *)(eip + 2)))
+	if (__get_user(line, (unsigned short __user *)(eip + 4)))
 		goto bug;
-	if (__get_user(file, (char * __user *)(eip + 4)) ||
+	if (__get_user(file, (char * __user *)(eip + 7)) ||
 		(unsigned long)file < PAGE_OFFSET || __get_user(c, file))
 		file = "<bad filename>";
 
@@ -327,6 +345,8 @@ bug:
 	printk(KERN_EMERG "Kernel BUG\n");
 }
 
+int die_counter = 0;
+
 /* This is gone through when something in the kernel
  * has done something bad and is about to be terminated.
 */
@@ -341,7 +361,6 @@ void die(const char * str, struct pt_reg
 		.lock_owner =		-1,
 		.lock_owner_depth =	0
 	};
-	static int die_counter;
 	unsigned long flags;
 
 	if (die.lock_owner != raw_smp_processor_id()) {
@@ -382,6 +401,7 @@ void die(const char * str, struct pt_reg
   	} else
 		printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
 
+	check_kernel_csum_bug();
 	bust_spinlocks(0);
 	die.lock_owner = -1;
 	spin_unlock_irqrestore(&die.lock, flags);
@@ -611,12 +631,27 @@ static void unknown_nmi_error(unsigned c
 	printk("Do you have a strange power saving mode enabled?\n");
 }
 
-static DEFINE_SPINLOCK(nmi_print_lock);
+/*
+ * Voyager doesn't implement these
+ */
+void __attribute__((weak)) smp_show_regs(struct pt_regs *regs, void *info)
+{
+}
+
+#ifdef CONFIG_SMP
+int __attribute__((weak))
+smp_nmi_call_function(smp_nmi_function func, void *info, int wait)
+{
+	return 0;
+}
+#endif
 
 void die_nmi (struct pt_regs *regs, const char *msg)
 {
+	static DEFINE_SPINLOCK(nmi_print_lock);
+
 	if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 0, SIGINT) ==
-	    NOTIFY_STOP)
+			NOTIFY_STOP)
 		return;
 
 	spin_lock(&nmi_print_lock);
@@ -632,6 +667,11 @@ void die_nmi (struct pt_regs *regs, cons
 #ifdef	CONFIG_KDB
 	kdb(KDB_REASON_NMI, 0, regs);
 #endif	/* CONFIG_KDB */
+	smp_nmi_call_function(smp_show_regs, NULL, 1);
+	bust_spinlocks(1);
+	/* current CPU messages should go bottom */
+	if (!decode_call_traces)
+		smp_show_regs(regs, NULL);
 	printk(KERN_EMERG "console shuts up ...\n");
 	dump((char *)msg, regs);
 	console_silent();
@@ -649,6 +689,14 @@ void die_nmi (struct pt_regs *regs, cons
 	do_exit(SIGSEGV);
 }
 
+static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
+{
+	return 0;
+}
+
+static nmi_callback_t nmi_callback = dummy_nmi_callback;
+static nmi_callback_t nmi_ipi_callback = dummy_nmi_callback;
+
 static void default_do_nmi(struct pt_regs * regs)
 {
 	unsigned char reason = 0;
@@ -681,6 +729,9 @@ static void default_do_nmi(struct pt_reg
 			return;
 		}
 #endif
+		if (nmi_ipi_callback != dummy_nmi_callback)
+			return;
+
 		unknown_nmi_error(reason, regs);
 		return;
 	}
@@ -697,13 +748,6 @@ static void default_do_nmi(struct pt_reg
 	reassert_nmi();
 }
 
-static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
-{
-	return 0;
-}
- 
-static nmi_callback_t nmi_callback = dummy_nmi_callback;
- 
 fastcall void do_nmi(struct pt_regs * regs, long error_code)
 {
 	int cpu;
@@ -717,9 +761,20 @@ fastcall void do_nmi(struct pt_regs * re
 	if (!rcu_dereference(nmi_callback)(regs, cpu))
 		default_do_nmi(regs);
 
+	nmi_ipi_callback(regs, cpu);
 	nmi_exit();
 }
 
+void set_nmi_ipi_callback(nmi_callback_t callback)
+{
+	nmi_ipi_callback = callback;
+}
+
+void unset_nmi_ipi_callback(void)
+{
+	nmi_ipi_callback = dummy_nmi_callback;
+}
+
 void set_nmi_callback(nmi_callback_t callback)
 {
 	rcu_assign_pointer(nmi_callback, callback);
diff -upr linux-2.6.16.46-0.12.orig/arch/i386/lib/Makefile linux-2.6.16.46-0.12-027test011/arch/i386/lib/Makefile
--- linux-2.6.16.46-0.12.orig/arch/i386/lib/Makefile	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/i386/lib/Makefile	2007-08-28 17:35:30.000000000 +0400
@@ -7,3 +7,5 @@ lib-y = checksum.o delay.o usercopy.o ge
 	bitops.o
 
 lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
+
+obj-$(CONFIG_SMP) += cpuid-on-cpu.o msr-on-cpu.o
diff -upr linux-2.6.16.46-0.12.orig/arch/i386/lib/cpuid-on-cpu.c linux-2.6.16.46-0.12-027test011/arch/i386/lib/cpuid-on-cpu.c
--- linux-2.6.16.46-0.12.orig/arch/i386/lib/cpuid-on-cpu.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/i386/lib/cpuid-on-cpu.c	2007-08-28 17:35:30.000000000 +0400
@@ -0,0 +1,73 @@
+#include <linux/module.h>
+#include <linux/preempt.h>
+#include <linux/smp.h>
+#include <linux/types.h>
+
+struct cpuid_info {
+	unsigned int cpu;
+	u32 op;
+	u32 eax, ebx, ecx, edx;
+};
+
+static void __cpuid_on_cpu(void *info)
+{
+	struct cpuid_info *rv = info;
+
+	if (smp_processor_id() == rv->cpu)
+		cpuid(rv->op, &rv->eax, &rv->ebx, &rv->ecx, &rv->edx);
+}
+
+void cpuid_on_cpu(unsigned int cpu, u32 op, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
+{
+	preempt_disable();
+	if (smp_processor_id() == cpu)
+		cpuid(op, eax, ebx, ecx, edx);
+	else {
+		struct cpuid_info rv;
+
+		rv.cpu = cpu;
+		rv.op = op;
+		smp_call_function(__cpuid_on_cpu, &rv, 0, 1);
+		*eax = rv.eax;
+		*ebx = rv.ebx;
+		*ecx = rv.ecx;
+		*edx = rv.edx;
+	}
+	preempt_enable();
+}
+
+struct cpuid_eax_info {
+	unsigned int cpu;
+	u32 op;
+	u32 eax;
+};
+
+static void __cpuid_eax_on_cpu(void *info)
+{
+	struct cpuid_info *rv = info;
+
+	if (smp_processor_id() == rv->cpu)
+		rv->eax = cpuid_eax(rv->op);
+}
+
+u32 cpuid_eax_on_cpu(unsigned int cpu, u32 op)
+{
+	u32 ret;
+
+	preempt_disable();
+	if (smp_processor_id() == cpu)
+		ret = cpuid_eax(op);
+	else {
+		struct cpuid_eax_info rv;
+
+		rv.cpu = cpu;
+		rv.op = op;
+		smp_call_function(__cpuid_eax_on_cpu, &rv, 0, 1);
+		ret = rv.eax;
+	}
+	preempt_enable();
+	return ret;
+}
+
+EXPORT_SYMBOL(cpuid_on_cpu);
+EXPORT_SYMBOL(cpuid_eax_on_cpu);
diff -upr linux-2.6.16.46-0.12.orig/arch/i386/lib/msr-on-cpu.c linux-2.6.16.46-0.12-027test011/arch/i386/lib/msr-on-cpu.c
--- linux-2.6.16.46-0.12.orig/arch/i386/lib/msr-on-cpu.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/i386/lib/msr-on-cpu.c	2007-08-28 17:35:30.000000000 +0400
@@ -0,0 +1,63 @@
+#include <linux/module.h>
+#include <linux/preempt.h>
+#include <linux/smp.h>
+#include <linux/types.h>
+
+struct msr_info {
+	unsigned int cpu;
+	u32 msr_no;
+	u32 l, h;
+};
+
+static void __rdmsr_on_cpu(void *info)
+{
+	struct msr_info *rv = info;
+
+	if (smp_processor_id() == rv->cpu)
+		rdmsr(rv->msr_no, rv->l, rv->h);
+}
+
+void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
+{
+	preempt_disable();
+	if (smp_processor_id() == cpu)
+		rdmsr(msr_no, *l, *h);
+	else {
+		struct msr_info rv;
+
+		rv.cpu = cpu;
+		rv.msr_no = msr_no;
+		smp_call_function(__rdmsr_on_cpu, &rv, 0, 1);
+		*l = rv.l;
+		*h = rv.h;
+	}
+	preempt_enable();
+}
+
+static void __wrmsr_on_cpu(void *info)
+{
+	struct msr_info *rv = info;
+
+	if (smp_processor_id() == rv->cpu)
+		wrmsr(rv->msr_no, rv->l, rv->h);
+}
+
+void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
+{
+	preempt_disable();
+	if (smp_processor_id() == cpu)
+		wrmsr(msr_no, l, h);
+	else {
+		struct msr_info rv;
+
+		rv.cpu = cpu;
+		rv.msr_no = msr_no;
+		rv.l = l;
+		rv.h = h;
+		smp_call_function(__wrmsr_on_cpu, &rv, 0, 1);
+	}
+	preempt_enable();
+}
+
+EXPORT_SYMBOL(rdmsr_on_cpu);
+EXPORT_SYMBOL(wrmsr_on_cpu);
diff -upr linux-2.6.16.46-0.12.orig/arch/i386/mm/fault.c linux-2.6.16.46-0.12-027test011/arch/i386/mm/fault.c
--- linux-2.6.16.46-0.12.orig/arch/i386/mm/fault.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/i386/mm/fault.c	2007-08-28 17:35:30.000000000 +0400
@@ -31,32 +31,6 @@
 extern void die(const char *,struct pt_regs *,long);
 
 /*
- * Unlock any spinlocks which will prevent us from getting the
- * message out 
- */
-void bust_spinlocks(int yes)
-{
-	int loglevel_save = console_loglevel;
-
-	if (yes) {
-		oops_in_progress = 1;
-		return;
-	}
-#ifdef CONFIG_VT
-	unblank_screen();
-#endif
-	oops_in_progress = 0;
-	/*
-	 * OK, the message is on the console.  Now we call printk()
-	 * without oops_in_progress set so that printk will give klogd
-	 * a poke.  Hold onto your hats...
-	 */
-	console_loglevel = 15;		/* NMI oopser may have shut the console up */
-	printk(" ");
-	console_loglevel = loglevel_save;
-}
-
-/*
  * Return EIP plus the CS segment base.  The segment limit is also
  * adjusted, clamped to the kernel/user address space (whichever is
  * appropriate), and returned in *eip_limit.
@@ -347,7 +321,6 @@ good_area:
 				goto bad_area;
 	}
 
- survive:
 	/*
 	 * If for any reason at all we couldn't handle the fault,
 	 * make sure we exit gracefully rather than endlessly redo
@@ -485,14 +458,14 @@ no_context:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (tsk->pid == 1) {
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
+	if (error_code & 4) {
+		/*
+		 * 0-order allocation always success if something really
+		 * fatal not happen: beancounter overdraft or OOM.
+		 */
+		force_sig(SIGKILL, tsk);
+		return;
 	}
-	printk("VM: killing process %s\n", tsk->comm);
-	if (error_code & 4)
-		do_exit(SIGKILL);
 	goto no_context;
 
 do_sigbus:
diff -upr linux-2.6.16.46-0.12.orig/arch/i386/mm/hugetlbpage.c linux-2.6.16.46-0.12-027test011/arch/i386/mm/hugetlbpage.c
--- linux-2.6.16.46-0.12.orig/arch/i386/mm/hugetlbpage.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/i386/mm/hugetlbpage.c	2007-08-28 17:35:33.000000000 +0400
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/sysctl.h>
+#include <linux/module.h>
 #include <asm/mman.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
@@ -110,6 +111,7 @@ int pmd_huge(pmd_t pmd)
 {
 	return !!(pmd_val(pmd) & _PAGE_PSE);
 }
+EXPORT_SYMBOL(pmd_huge);
 
 struct page *
 follow_huge_pmd(struct mm_struct *mm, unsigned long address,
diff -upr linux-2.6.16.46-0.12.orig/arch/i386/mm/init.c linux-2.6.16.46-0.12-027test011/arch/i386/mm/init.c
--- linux-2.6.16.46-0.12.orig/arch/i386/mm/init.c	2007-08-24 19:28:10.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/i386/mm/init.c	2007-08-28 17:35:30.000000000 +0400
@@ -679,7 +679,7 @@ void __init pgtable_cache_init(void)
 		pmd_cache = kmem_cache_create("pmd",
 					PTRS_PER_PMD*sizeof(pmd_t),
 					PTRS_PER_PMD*sizeof(pmd_t),
-					0,
+					SLAB_UBC,
 					pmd_ctor,
 					NULL);
 		if (!pmd_cache)
@@ -688,7 +688,7 @@ void __init pgtable_cache_init(void)
 	pgd_cache = kmem_cache_create("pgd",
 				PTRS_PER_PGD*sizeof(pgd_t),
 				PTRS_PER_PGD*sizeof(pgd_t),
-				0,
+				SLAB_UBC,
 				pgd_ctor,
 				PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
 	if (!pgd_cache)
diff -upr linux-2.6.16.46-0.12.orig/arch/i386/mm/pgtable.c linux-2.6.16.46-0.12-027test011/arch/i386/mm/pgtable.c
--- linux-2.6.16.46-0.12.orig/arch/i386/mm/pgtable.c	2007-08-24 19:28:37.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/i386/mm/pgtable.c	2007-08-28 17:35:32.000000000 +0400
@@ -5,8 +5,10 @@
 #include <linux/config.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
+#include <linux/vmalloc.h>
 #include <linux/swap.h>
 #include <linux/smp.h>
 #include <linux/highmem.h>
@@ -65,6 +67,7 @@ void show_mem(void)
 	printk(KERN_INFO "%lu pages slab\n", ps.nr_slab);
 	printk(KERN_INFO "%lu pages pagetables\n", ps.nr_page_table_pages);
 }
+EXPORT_SYMBOL(show_mem);
 
 /*
  * Associate a virtual page frame with a given physical page frame 
@@ -159,9 +162,11 @@ struct page *pte_alloc_one(struct mm_str
 	struct page *pte;
 
 #ifdef CONFIG_HIGHPTE
-	pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
+	pte = alloc_pages(GFP_KERNEL_UBC|__GFP_SOFT_UBC|__GFP_HIGHMEM|
+			__GFP_REPEAT|__GFP_ZERO, 0);
 #else
-	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+	pte = alloc_pages(GFP_KERNEL_UBC|__GFP_SOFT_UBC|
+			__GFP_REPEAT|__GFP_ZERO, 0);
 #endif
 	return pte;
 }
diff -upr linux-2.6.16.46-0.12.orig/arch/ia64/Kconfig linux-2.6.16.46-0.12-027test011/arch/ia64/Kconfig
--- linux-2.6.16.46-0.12.orig/arch/ia64/Kconfig	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/ia64/Kconfig	2007-08-28 17:35:34.000000000 +0400
@@ -299,6 +299,8 @@ config PREEMPT
           Say Y here if you are building a kernel for a desktop, embedded
           or real-time system.  Say N if you are unsure.
 
+source "kernel/Kconfig.fairsched"
+
 source "mm/Kconfig"
 
 config ARCH_SELECT_MEMORY_MODEL
@@ -505,6 +507,10 @@ endmenu
 
 source "arch/ia64/Kconfig.debug"
 
+source "kernel/Kconfig.openvz"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
+
+source "kernel/ub/Kconfig"
diff -upr linux-2.6.16.46-0.12.orig/arch/ia64/ia32/binfmt_elf32.c linux-2.6.16.46-0.12-027test011/arch/ia64/ia32/binfmt_elf32.c
--- linux-2.6.16.46-0.12.orig/arch/ia64/ia32/binfmt_elf32.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/ia64/ia32/binfmt_elf32.c	2007-08-28 17:35:30.000000000 +0400
@@ -18,6 +18,8 @@
 #include <asm/param.h>
 #include <asm/signal.h>
 
+#include <ub/ub_vmpages.h>
+
 #include "ia32priv.h"
 #include "elfcore32.h"
 
@@ -136,6 +138,12 @@ ia64_elf32_init (struct pt_regs *regs)
 		up_write(&current->mm->mmap_sem);
 	}
 
+	if (ub_memory_charge(current->mm, PAGE_ALIGN(IA32_LDT_ENTRIES *
+					IA32_LDT_ENTRY_SIZE),
+				VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE,
+				NULL, UB_SOFT))
+		goto skip;
+
 	/*
 	 * Install LDT as anonymous memory.  This gives us all-zero segment descriptors
 	 * until a task modifies them via modify_ldt().
@@ -157,7 +165,12 @@ ia64_elf32_init (struct pt_regs *regs)
 			}
 		}
 		up_write(&current->mm->mmap_sem);
-	}
+	} else
+		ub_memory_uncharge(current->mm, PAGE_ALIGN(IA32_LDT_ENTRIES *
+					IA32_LDT_ENTRY_SIZE),
+				VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE, NULL);
+
+skip:
 
 	ia64_psr(regs)->ac = 0;		/* turn off alignment checking */
 	regs->loadrs = 0;
@@ -212,9 +225,15 @@ ia32_setup_arg_pages (struct linux_binpr
 		bprm->loader += stack_base;
 	bprm->exec += stack_base;
 
+	ret = -ENOMEM;
+	if (ub_memory_charge(mm, IA32_STACK_TOP -
+				(PAGE_MASK & (unsigned long)bprm->p),
+				VM_STACK_FLAGS, NULL, UB_SOFT))
+		goto err_charge;
+
 	mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 	if (!mpnt)
-		return -ENOMEM;
+		goto err_alloc;
 
 	memset(mpnt, 0, sizeof(*mpnt));
 
@@ -231,11 +250,8 @@ ia32_setup_arg_pages (struct linux_binpr
 			mpnt->vm_flags = VM_STACK_FLAGS;
 		mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC)?
 					PAGE_COPY_EXEC: PAGE_COPY;
-		if ((ret = insert_vm_struct(current->mm, mpnt))) {
-			up_write(&current->mm->mmap_sem);
-			kmem_cache_free(vm_area_cachep, mpnt);
-			return ret;
-		}
+		if ((ret = insert_vm_struct(current->mm, mpnt)))
+			goto err_insert;
 		current->mm->stack_vm = current->mm->total_vm = vma_pages(mpnt);
 	}
 
@@ -254,6 +270,16 @@ ia32_setup_arg_pages (struct linux_binpr
 	current->thread.ppl = ia32_init_pp_list();
 
 	return 0;
+
+err_insert:
+	up_write(&current->mm->mmap_sem);
+	kmem_cache_free(vm_area_cachep, mpnt);
+err_alloc:
+	ub_memory_uncharge(mm, IA32_STACK_TOP -
+			(PAGE_MASK & (unsigned long)bprm->p),
+			VM_STACK_FLAGS, NULL);
+err_charge:
+	return ret;
 }
 
 static void
diff -upr linux-2.6.16.46-0.12.orig/arch/ia64/ia32/ia32_entry.S linux-2.6.16.46-0.12-027test011/arch/ia64/ia32/ia32_entry.S
--- linux-2.6.16.46-0.12.orig/arch/ia64/ia32/ia32_entry.S	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/ia64/ia32/ia32_entry.S	2007-08-28 17:35:33.000000000 +0400
@@ -52,43 +52,6 @@ ENTRY(ia32_clone)
 	br.ret.sptk.many rp
 END(ia32_clone)
 
-ENTRY(sys32_rt_sigsuspend)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
-	alloc loc1=ar.pfs,8,2,3,0		// preserve all eight input regs
-	mov loc0=rp
-	mov out0=in0				// mask
-	mov out1=in1				// sigsetsize
-	mov out2=sp				// out2 = &sigscratch
-	.fframe 16
-	adds sp=-16,sp				// allocate dummy "sigscratch"
-	;;
-	.body
-	br.call.sptk.many rp=ia32_rt_sigsuspend
-1:	.restore sp
-	adds sp=16,sp
-	mov rp=loc0
-	mov ar.pfs=loc1
-	br.ret.sptk.many rp
-END(sys32_rt_sigsuspend)
-
-ENTRY(sys32_sigsuspend)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
-	alloc loc1=ar.pfs,8,2,3,0		// preserve all eight input regs
-	mov loc0=rp
-	mov out0=in2				// mask (first two args are ignored)
-	;;
-	mov out1=sp				// out1 = &sigscratch
-	.fframe 16
-	adds sp=-16,sp				// allocate dummy "sigscratch"
-	.body
-	br.call.sptk.many rp=ia32_sigsuspend
-1:	.restore sp
-	adds sp=16,sp
-	mov rp=loc0
-	mov ar.pfs=loc1
-	br.ret.sptk.many rp
-END(sys32_sigsuspend)
-
 GLOBAL_ENTRY(ia32_ret_from_clone)
 	PT_REGS_UNWIND_INFO(0)
 {	/*
@@ -341,7 +304,7 @@ ia32_syscall_table:
 	data8 sys_ni_syscall	/* init_module */
 	data8 sys_ni_syscall	/* delete_module */
 	data8 sys_ni_syscall	/* get_kernel_syms */  /* 130 */
-	data8 sys_quotactl
+	data8 sys32_quotactl
 	data8 sys_getpgid
 	data8 sys_fchdir
 	data8 sys_ni_syscall	/* sys_bdflush */
@@ -389,7 +352,7 @@ ia32_syscall_table:
 	data8 sys_rt_sigpending
 	data8 compat_sys_rt_sigtimedwait
 	data8 sys32_rt_sigqueueinfo
-	data8 sys32_rt_sigsuspend
+	data8 compat_sys_rt_sigsuspend
 	data8 sys32_pread	  /* 180 */
 	data8 sys32_pwrite
 	data8 sys_chown	/* 16-bit version */
diff -upr linux-2.6.16.46-0.12.orig/arch/ia64/ia32/ia32_signal.c linux-2.6.16.46-0.12-027test011/arch/ia64/ia32/ia32_signal.c
--- linux-2.6.16.46-0.12.orig/arch/ia64/ia32/ia32_signal.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/ia64/ia32/ia32_signal.c	2007-08-28 17:35:33.000000000 +0400
@@ -452,59 +452,20 @@ sigact_set_handler (struct k_sigaction *
 		sa->sa.sa_handler = (__sighandler_t) (((unsigned long) restorer << 32) | handler);
 }
 
-long
-__ia32_rt_sigsuspend (compat_sigset_t *sset, unsigned int sigsetsize, struct sigscratch *scr)
+asmlinkage long
+sys32_sigsuspend (int history0, int history1, old_sigset_t mask)
 {
-	extern long ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall);
-	sigset_t oldset, set;
-
-	scr->scratch_unat = 0;	/* avoid leaking kernel bits to user level */
-	memset(&set, 0, sizeof(set));
-
-	memcpy(&set.sig, &sset->sig, sigsetsize);
-
-	sigdelsetmask(&set, ~_BLOCKABLE);
-
+	mask &= _BLOCKABLE;
 	spin_lock_irq(&current->sighand->siglock);
-	{
-		oldset = current->blocked;
-		current->blocked = set;
-		recalc_sigpending();
-	}
+	current->saved_sigmask = current->blocked;
+	siginitset(&current->blocked, mask);
+	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 
-	/*
-	 * The return below usually returns to the signal handler.  We need to pre-set the
-	 * correct error code here to ensure that the right values get saved in sigcontext
-	 * by ia64_do_signal.
-	 */
-	scr->pt.r8 = -EINTR;
-	while (1) {
-		current->state = TASK_INTERRUPTIBLE;
-		schedule();
-		if (ia64_do_signal(&oldset, scr, 1))
-			return -EINTR;
-	}
-}
-
-asmlinkage long
-ia32_rt_sigsuspend (compat_sigset_t __user *uset, unsigned int sigsetsize, struct sigscratch *scr)
-{
-	compat_sigset_t set;
-
-	if (sigsetsize > sizeof(compat_sigset_t))
-		return -EINVAL;
-
-	if (copy_from_user(&set.sig, &uset->sig, sigsetsize))
-		return -EFAULT;
-
-	return __ia32_rt_sigsuspend(&set, sigsetsize, scr);
-}
-
-asmlinkage long
-ia32_sigsuspend (unsigned int mask, struct sigscratch *scr)
-{
-	return __ia32_rt_sigsuspend((compat_sigset_t *) &mask, sizeof(mask), scr);
+	current->state = TASK_INTERRUPTIBLE;
+	schedule();
+	set_thread_flag(TIF_RESTORE_SIGMASK);
+	return -ERESTARTNOHAND;
 }
 
 asmlinkage long
diff -upr linux-2.6.16.46-0.12.orig/arch/ia64/kernel/asm-offsets.c linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/asm-offsets.c
--- linux-2.6.16.46-0.12.orig/arch/ia64/kernel/asm-offsets.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/asm-offsets.c	2007-08-28 17:35:31.000000000 +0400
@@ -44,11 +44,21 @@ void foo(void)
 	DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
 	DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
 	DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
+#ifdef CONFIG_VE
+	DEFINE(IA64_TASK_PID_OFFSET, offsetof
+			(struct task_struct, pids[PIDTYPE_PID].vnr));
+#else
 	DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid));
+#endif
 	DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent));
 	DEFINE(IA64_TASK_SIGHAND_OFFSET,offsetof (struct task_struct, sighand));
 	DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal));
+#ifdef CONFIG_VE
+	DEFINE(IA64_TASK_TGID_OFFSET, offsetof
+			(struct task_struct, pids[PIDTYPE_TGID].vnr));
+#else
 	DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid));
+#endif
 	DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct task_struct, thread.ksp));
 	DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct task_struct, thread.on_ustack));
 
diff -upr linux-2.6.16.46-0.12.orig/arch/ia64/kernel/entry.S linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/entry.S
--- linux-2.6.16.46-0.12.orig/arch/ia64/kernel/entry.S	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/entry.S	2007-08-28 17:35:34.000000000 +0400
@@ -505,6 +505,74 @@ GLOBAL_ENTRY(clone)
 	br.ret.sptk.many rp
 END(clone)
 
+GLOBAL_ENTRY(ia64_ret_from_resume)
+	PT_REGS_UNWIND_INFO(0)
+{	/*
+	 * Some versions of gas generate bad unwind info if the first instruction of a
+	 * procedure doesn't go into the first slot of a bundle.  This is a workaround.
+	 */
+	nop.m 0
+	nop.i 0
+	/*
+	 * We need to call schedule_tail() to complete the scheduling process.
+	 * Called by ia64_switch_to() after do_fork()->copy_thread().  r8 contains the
+	 * address of the previously executing task.
+	 */
+	br.call.sptk.many rp=ia64_invoke_schedule_tail
+}
+	br.call.sptk.many rp=ia64_invoke_resume
+	;;
+	adds sp=256,sp
+	;;
+	/* Return from interrupt, we are all right. */
+(pNonSys) br ia64_leave_kernel
+	;;
+	/* Tricky part follows. We must restore correct syscall
+	 * register frame before doing normal syscall exit job.
+	 * It would the most natural to keep sw->ar_pfs correct,
+	 * then we would be here with correct register frame.
+	 * Unfortunately, IA64 has a feature. Registers were in backstore
+	 * after context switch, and the first br.ret does _NOT_ fetch
+	 * output registers.
+	 * It is quite natural:	look, if caller has output regs in his
+	 * frame, they should be consumed. If callee does not have (enough of)
+	 * input/local registers (1 in this case), the situation is unusual.
+	 * Practical evidence: they are filled with something random crap.
+	 * The only case, when this is essential in mainstream kernel
+	 * is sys_clone(). The result is that new process gets some kernel
+	 * information in its register frame. Which is a security problem, btw.
+	 *
+	 * So, we set sw->ar_pfs to pretend the whole frame is of local
+	 * regs. And we have to repartition the frame it manually, using
+	 * information from pt->cr_ifs (the register is invalid in this
+	 * case, but it holds correct pfm).
+	 */
+	adds r3=PT(CR_IFS)+16,sp
+	;;
+	ld8  r2=[r3],-(PT(CR_IFS)-PT(R8))
+	;;
+	extr.u  r2=r2,0,37
+	mov	r8=ar.ec
+	;;
+	extr.u  r8=r8,0,5
+	;;
+	shl	r8=r8,52
+	;;
+	or	r2=r2,r8
+	;;
+	mov  ar.pfs=r2
+	;;
+	movl r2=ia64_leave_syscall
+	;;
+	mov  rp=r2
+	/* Plus, we should fetch r8 and r10 from pt_regs. Something else? */
+	ld8  r8=[r3],PT(R10)-PT(R8)
+	;;
+	ld8  r10=[r3]
+	;;
+	br.ret.sptk.many rp
+END(ia64_ret_from_resume)
+
 	/*
 	 * Invoke a system call, but do some tracing before and after the call.
 	 * We MUST preserve the current register frame throughout this routine
@@ -1185,6 +1253,34 @@ GLOBAL_ENTRY(ia64_invoke_schedule_tail)
 	br.ret.sptk.many rp
 END(ia64_invoke_schedule_tail)
 
+GLOBAL_ENTRY(ia64_invoke_resume)
+	alloc loc1=ar.pfs,0,3,1,0
+	mov loc0=rp
+	adds out0=16,sp
+	;;
+	ld8  r8=[out0]
+	;;
+	cmp.eq p6,p0=r8,r0
+	;;
+(p6)	br.cond.sptk 1f
+	;;
+	mov  loc2=gp
+	;;
+	ld8  r10=[r8],8
+	;;
+	ld8  gp=[r8]
+	;;
+	mov  b7=r10
+	;;
+	br.call.sptk.many rp=b7
+	;;
+	mov  gp=loc2
+1:	
+	mov ar.pfs=loc1
+	mov rp=loc0
+	br.ret.sptk.many rp
+END(ia64_invoke_resume)
+
 	/*
 	 * Setup stack and call do_notify_resume_user().  Note that pSys and pNonSys need to
 	 * be set up by the caller.  We declare 8 input registers so the system call
@@ -1217,32 +1313,6 @@ ENTRY(notify_resume_user)
 	br.ret.sptk.many rp
 END(notify_resume_user)
 
-GLOBAL_ENTRY(sys_rt_sigsuspend)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
-	alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart!
-	mov r9=ar.unat
-	mov loc0=rp				// save return address
-	mov out0=in0				// mask
-	mov out1=in1				// sigsetsize
-	adds out2=8,sp				// out2=&sigscratch->ar_pfs
-	;;
-	.fframe 16
-	.spillsp ar.unat, 16
-	st8 [sp]=r9,-16				// allocate space for ar.unat and save it
-	st8 [out2]=loc1,-8			// save ar.pfs, out2=&sigscratch
-	.body
-	br.call.sptk.many rp=ia64_rt_sigsuspend
-.ret17:	.restore sp
-	adds sp=16,sp				// pop scratch stack space
-	;;
-	ld8 r9=[sp]				// load new unat from sw->caller_unat
-	mov rp=loc0
-	;;
-	mov ar.unat=r9
-	mov ar.pfs=loc1
-	br.ret.sptk.many rp
-END(sys_rt_sigsuspend)
-
 ENTRY(sys_rt_sigreturn)
 	PT_REGS_UNWIND_INFO(0)
 	/*
@@ -1616,8 +1686,23 @@ sys_call_table:
 	data8 sys_readlinkat
 	data8 sys_fchmodat
 	data8 sys_faccessat
-	data8 sys_ni_syscall			// reserved for pselect
-	data8 sys_ni_syscall			// 1295 reserved for ppoll
+	data8 sys_pselect6
+	data8 sys_ppoll
 	data8 sys_unshare
+.rept 1499-1297
+	data8 sys_ni_syscall
+.endr
+	data8 sys_fairsched_vcpus
+	data8 sys_fairsched_mknod		// 1500
+	data8 sys_fairsched_rmnod
+	data8 sys_fairsched_chwt
+	data8 sys_fairsched_mvpr
+	data8 sys_fairsched_rate
+	data8 sys_getluid			// 1505
+	data8 sys_setluid
+	data8 sys_setublimit
+	data8 sys_ubstat
+	data8 sys_lchmod
+	data8 sys_lutime			// 1510
 
 	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
diff -upr linux-2.6.16.46-0.12.orig/arch/ia64/kernel/fsys.S linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/fsys.S
--- linux-2.6.16.46-0.12.orig/arch/ia64/kernel/fsys.S	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/fsys.S	2007-08-28 17:35:31.000000000 +0400
@@ -72,6 +72,7 @@ ENTRY(fsys_getpid)
 	FSYS_RETURN
 END(fsys_getpid)
 
+#ifndef CONFIG_VE
 ENTRY(fsys_getppid)
 	.prologue
 	.altrp b6
@@ -118,6 +119,7 @@ ENTRY(fsys_getppid)
 #endif
 	FSYS_RETURN
 END(fsys_getppid)
+#endif
 
 ENTRY(fsys_set_tid_address)
 	.prologue
@@ -665,7 +667,11 @@ fsyscall_table:
 	data8 0				// chown
 	data8 0				// lseek		// 1040
 	data8 fsys_getpid		// getpid
+#ifdef CONFIG_VE
+	data8 0
+#else
 	data8 fsys_getppid		// getppid
+#endif
 	data8 0				// mount
 	data8 0				// umount
 	data8 0				// setuid		// 1045
diff -upr linux-2.6.16.46-0.12.orig/arch/ia64/kernel/ia64_ksyms.c linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/ia64_ksyms.c
--- linux-2.6.16.46-0.12.orig/arch/ia64/kernel/ia64_ksyms.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/ia64_ksyms.c	2007-08-28 17:35:33.000000000 +0400
@@ -74,6 +74,8 @@ EXPORT_SYMBOL(xor_ia64_4);
 EXPORT_SYMBOL(xor_ia64_5);
 #endif
 
+EXPORT_SYMBOL(empty_zero_page);
+
 #include <asm/pal.h>
 EXPORT_SYMBOL(ia64_pal_call_phys_stacked);
 EXPORT_SYMBOL(ia64_pal_call_phys_static);
diff -upr linux-2.6.16.46-0.12.orig/arch/ia64/kernel/mca.c linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/mca.c
--- linux-2.6.16.46-0.12.orig/arch/ia64/kernel/mca.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/mca.c	2007-08-28 17:35:31.000000000 +0400
@@ -1498,10 +1498,10 @@ default_monarch_init_process(struct noti
 	KDB_FLAG_CLEAR(NOIPI);
 #else	/* !CONFIG_KDB */
 	if (read_trylock(&tasklist_lock)) {
-		do_each_thread (g, t) {
+		do_each_thread_all (g, t) {
 			printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm);
 			show_stack(t, NULL);
-		} while_each_thread (g, t);
+		} while_each_thread_all (g, t);
 		read_unlock(&tasklist_lock);
 	}
 #endif	/* CONFIG_KDB */
diff -upr linux-2.6.16.46-0.12.orig/arch/ia64/kernel/perfmon.c linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/perfmon.c
--- linux-2.6.16.46-0.12.orig/arch/ia64/kernel/perfmon.c	2007-08-24 19:28:27.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/perfmon.c	2007-08-28 17:35:31.000000000 +0400
@@ -2627,7 +2627,7 @@ pfm_get_task(pfm_context_t *ctx, pid_t p
 
 		read_lock(&tasklist_lock);
 
-		p = find_task_by_pid(pid);
+		p = find_task_by_pid_ve(pid);
 
 		/* make sure task cannot go away while we operate on it */
 		if (p) get_task_struct(p);
@@ -4191,12 +4191,12 @@ pfm_check_task_exist(pfm_context_t *ctx)
 
 	read_lock(&tasklist_lock);
 
-	do_each_thread (g, t) {
+	do_each_thread_ve (g, t) {
 		if (t->thread.pfm_context == ctx) {
 			ret = 0;
 			break;
 		}
-	} while_each_thread (g, t);
+	} while_each_thread_ve (g, t);
 
 	read_unlock(&tasklist_lock);
 
diff -upr linux-2.6.16.46-0.12.orig/arch/ia64/kernel/process.c linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/process.c
--- linux-2.6.16.46-0.12.orig/arch/ia64/kernel/process.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/process.c	2007-08-28 17:35:36.000000000 +0400
@@ -30,6 +30,7 @@
 #include <linux/efi.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
+#include <linux/sysctl.h>
 
 #include <asm/cpu.h>
 #include <asm/delay.h>
@@ -94,6 +95,8 @@ show_stack (struct task_struct *task, un
 	}
 }
 
+EXPORT_SYMBOL(show_stack);
+
 void
 dump_stack (void)
 {
@@ -108,7 +111,8 @@ show_regs (struct pt_regs *regs)
 	unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
 
 	print_modules();
-	printk("\nPid: %d, CPU %d, comm: %20s\n", current->pid, smp_processor_id(), current->comm);
+	printk("\nPid: %d, CPU %d, VCPU %d:%d, comm: %20s\n", current->pid, smp_processor_id(),
+			task_vsched_id(current), task_cpu(current), current->comm);
 	printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]    %s\n",
 	       regs->cr_ipsr, regs->cr_ifs, ip, print_tainted());
 	print_symbol("ip is at %s\n", ip);
@@ -158,7 +162,7 @@ show_regs (struct pt_regs *regs)
 }
 
 void
-do_notify_resume_user (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
+do_notify_resume_user (sigset_t *unused, struct sigscratch *scr, long in_syscall)
 {
 	if (fsys_mode(current, &scr->pt)) {
 		/* defer signal-handling etc. until we return to privilege-level 0.  */
@@ -173,8 +177,8 @@ do_notify_resume_user (sigset_t *oldset,
 #endif
 
 	/* deal with pending signal delivery */
-	if (test_thread_flag(TIF_SIGPENDING))
-		ia64_do_signal(oldset, scr, in_syscall);
+	if (test_thread_flag(TIF_SIGPENDING)||test_thread_flag(TIF_RESTORE_SIGMASK))
+		ia64_do_signal(scr, in_syscall);
 }
 
 static int pal_halt        = 1;
@@ -357,6 +361,9 @@ ia64_load_extra (struct task_struct *tas
 #endif
 }
 
+extern char ia64_ret_from_resume;
+EXPORT_SYMBOL(ia64_ret_from_resume);
+
 /*
  * Copy the state of an ia-64 thread.
  *
@@ -430,7 +437,6 @@ copy_thread (int nr, unsigned long clone
 			child_ptregs->r12 = user_stack_base + user_stack_size - 16;
 			child_ptregs->ar_bspstore = user_stack_base;
 			child_ptregs->ar_rnat = 0;
-			child_ptregs->loadrs = 0;
 		}
 	} else {
 		/*
@@ -670,16 +676,26 @@ out:
 	return error;
 }
 
+extern void start_kernel_thread (void);
+EXPORT_SYMBOL(start_kernel_thread);
+EXPORT_SYMBOL(execve);
+
 pid_t
 kernel_thread (int (*fn)(void *), void *arg, unsigned long flags)
 {
-	extern void start_kernel_thread (void);
 	unsigned long *helper_fptr = (unsigned long *) &start_kernel_thread;
 	struct {
 		struct switch_stack sw;
 		struct pt_regs pt;
 	} regs;
 
+	/* Don't allow kernel_thread() inside VE */
+	if (!ve_allow_kthreads && !ve_is_super(get_exec_env())) {
+		printk("kernel_thread call inside VE\n");
+		dump_stack();
+		return -EPERM;
+	}
+
 	memset(&regs, 0, sizeof(regs));
 	regs.pt.cr_iip = helper_fptr[0];	/* set entry point (IP) */
 	regs.pt.r1 = helper_fptr[1];		/* set GP */
diff -upr linux-2.6.16.46-0.12.orig/arch/ia64/kernel/ptrace.c linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/ptrace.c
--- linux-2.6.16.46-0.12.orig/arch/ia64/kernel/ptrace.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/ptrace.c	2007-08-28 17:35:33.000000000 +0400
@@ -8,6 +8,7 @@
  */
 #include <linux/config.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
@@ -101,6 +102,8 @@ ia64_get_scratch_nat_bits (struct pt_reg
 
 #	undef GET_BITS
 }
+EXPORT_SYMBOL(ia64_get_scratch_nat_bits);
+EXPORT_SYMBOL(__ia64_save_fpu);
 
 /*
  * Set the NaT bits for the scratch registers according to NAT and
@@ -457,6 +460,7 @@ ia64_peek (struct task_struct *child, st
 	*val = ret;
 	return 0;
 }
+EXPORT_SYMBOL(ia64_peek);
 
 long
 ia64_poke (struct task_struct *child, struct switch_stack *child_stack,
@@ -521,6 +525,7 @@ ia64_get_user_rbs_end (struct task_struc
 		*cfmp = cfm;
 	return (unsigned long) ia64_rse_skip_regs(bspstore, ndirty);
 }
+EXPORT_SYMBOL(ia64_get_user_rbs_end);
 
 /*
  * Synchronize (i.e, write) the RSE backing store living in kernel
@@ -758,20 +763,20 @@ access_nat_bits (struct task_struct *chi
 	if (write_access) {
 		nat_bits = *data;
 		scratch_unat = ia64_put_scratch_nat_bits(pt, nat_bits);
-		if (unw_set_ar(info, UNW_AR_UNAT, scratch_unat) < 0) {
-			dprintk("ptrace: failed to set ar.unat\n");
-			return -1;
-		}
+		if (info->pri_unat_loc)
+			*info->pri_unat_loc = scratch_unat;
+		else
+			info->sw->caller_unat = scratch_unat;
 		for (regnum = 4; regnum <= 7; ++regnum) {
 			unw_get_gr(info, regnum, &dummy, &nat);
 			unw_set_gr(info, regnum, dummy,
 				   (nat_bits >> regnum) & 1);
 		}
 	} else {
-		if (unw_get_ar(info, UNW_AR_UNAT, &scratch_unat) < 0) {
-			dprintk("ptrace: failed to read ar.unat\n");
-			return -1;
-		}
+		if (info->pri_unat_loc)
+			scratch_unat = *info->pri_unat_loc;
+		else
+			scratch_unat = info->sw->caller_unat;
 		nat_bits = ia64_get_scratch_nat_bits(pt, scratch_unat);
 		for (regnum = 4; regnum <= 7; ++regnum) {
 			unw_get_gr(info, regnum, &dummy, &nat);
@@ -1433,7 +1438,7 @@ sys_ptrace (long request, pid_t pid, uns
 	ret = -ESRCH;
 	read_lock(&tasklist_lock);
 	{
-		child = find_task_by_pid(pid);
+		child = find_task_by_pid_ve(pid);
 		if (child) {
 			if (peek_or_poke)
 				child = find_thread_for_addr(child, addr);
diff -upr linux-2.6.16.46-0.12.orig/arch/ia64/kernel/setup.c linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/setup.c
--- linux-2.6.16.46-0.12.orig/arch/ia64/kernel/setup.c	2007-08-24 19:28:28.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/setup.c	2007-08-28 17:35:34.000000000 +0400
@@ -46,6 +46,7 @@
 #include <linux/cpufreq.h>
 #include <linux/kexec.h>
 #include <linux/crash_dump.h>
+#include <linux/vsched.h>
 
 #include <asm/ia32.h>
 #include <asm/machvec.h>
@@ -617,9 +618,13 @@ show_cpuinfo (struct seq_file *m, void *
 		sprintf(cp, " 0x%lx", mask);
 	}
 
+#ifndef CONFIG_FAIRSCHED
 	proc_freq = cpufreq_quick_get(cpunum);
 	if (!proc_freq)
 		proc_freq = c->proc_freq / 1000;
+#else
+	proc_freq = ve_scale_khz(c->proc_freq) / 1000;
+#endif
 
 	seq_printf(m,
 		   "processor  : %d\n"
@@ -658,7 +663,7 @@ static void *
 c_start (struct seq_file *m, loff_t *pos)
 {
 #ifdef CONFIG_SMP
-	while (*pos < NR_CPUS && !cpu_isset(*pos, cpu_online_map))
+	while (*pos < NR_CPUS && !vcpu_online(*pos))
 		++*pos;
 #endif
 	return *pos < NR_CPUS ? cpu_data(*pos) : NULL;
diff -upr linux-2.6.16.46-0.12.orig/arch/ia64/kernel/sigframe.h linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/sigframe.h
--- linux-2.6.16.46-0.12.orig/arch/ia64/kernel/sigframe.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/sigframe.h	2007-08-28 17:35:30.000000000 +0400
@@ -22,4 +22,4 @@ struct sigframe {
 	struct sigcontext sc;
 };
 
-extern long ia64_do_signal (sigset_t *, struct sigscratch *, long);
+extern void ia64_do_signal (struct sigscratch *, long);
diff -upr linux-2.6.16.46-0.12.orig/arch/ia64/kernel/signal.c linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/signal.c
--- linux-2.6.16.46-0.12.orig/arch/ia64/kernel/signal.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/signal.c	2007-08-28 17:35:33.000000000 +0400
@@ -42,47 +42,6 @@
 # define GET_SIGSET(k,u)	__get_user((k)->sig[0], &(u)->sig[0])
 #endif
 
-long
-ia64_rt_sigsuspend (sigset_t __user *uset, size_t sigsetsize, struct sigscratch *scr)
-{
-	sigset_t oldset, set;
-
-	/* XXX: Don't preclude handling different sized sigset_t's.  */
-	if (sigsetsize != sizeof(sigset_t))
-		return -EINVAL;
-
-	if (!access_ok(VERIFY_READ, uset, sigsetsize))
-		return -EFAULT;
-
-	if (GET_SIGSET(&set, uset))
-		return -EFAULT;
-
-	sigdelsetmask(&set, ~_BLOCKABLE);
-
-	spin_lock_irq(&current->sighand->siglock);
-	{
-		oldset = current->blocked;
-		current->blocked = set;
-		recalc_sigpending();
-	}
-	spin_unlock_irq(&current->sighand->siglock);
-
-	/*
-	 * The return below usually returns to the signal handler.  We need to
-	 * pre-set the correct error code here to ensure that the right values
-	 * get saved in sigcontext by ia64_do_signal.
-	 */
-	scr->pt.r8 = EINTR;
-	scr->pt.r10 = -1;
-
-	while (1) {
-		current->state = TASK_INTERRUPTIBLE;
-		schedule();
-		if (ia64_do_signal(&oldset, scr, 1))
-			return -EINTR;
-	}
-}
-
 asmlinkage long
 sys_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, long arg2,
 		 long arg3, long arg4, long arg5, long arg6, long arg7,
@@ -270,7 +229,7 @@ ia64_rt_sigreturn (struct sigscratch *sc
 	si.si_signo = SIGSEGV;
 	si.si_errno = 0;
 	si.si_code = SI_KERNEL;
-	si.si_pid = current->pid;
+	si.si_pid = virt_pid(current);
 	si.si_uid = current->uid;
 	si.si_addr = sc;
 	force_sig_info(SIGSEGV, &si, current);
@@ -375,7 +334,7 @@ force_sigsegv_info (int sig, void __user
 	si.si_signo = SIGSEGV;
 	si.si_errno = 0;
 	si.si_code = SI_KERNEL;
-	si.si_pid = current->pid;
+	si.si_pid = virt_pid(current);
 	si.si_uid = current->uid;
 	si.si_addr = addr;
 	force_sig_info(SIGSEGV, &si, current);
@@ -479,10 +438,11 @@ handle_signal (unsigned long sig, struct
  * Note that `init' is a special process: it doesn't get signals it doesn't want to
  * handle.  Thus you cannot kill init even with a SIGKILL even by mistake.
  */
-long
-ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
+void
+ia64_do_signal (struct sigscratch *scr, long in_syscall)
 {
 	struct k_sigaction ka;
+	sigset_t *oldset;
 	siginfo_t info;
 	long restart = in_syscall;
 	long errno = scr->pt.r8;
@@ -494,9 +454,17 @@ ia64_do_signal (sigset_t *oldset, struct
 	 * doing anything if so.
 	 */
 	if (!user_mode(&scr->pt))
-		return 0;
+		return;
 
-	if (!oldset)
+	if (try_to_freeze() && !signal_pending(current)) {
+		if ((long) scr->pt.r10 != -1)
+			restart = 0;
+ 		goto no_signal;
+	}
+
+	if (test_thread_flag(TIF_RESTORE_SIGMASK))
+		oldset = &current->saved_sigmask;
+	else
 		oldset = &current->blocked;
 
 	/*
@@ -549,8 +517,10 @@ ia64_do_signal (sigset_t *oldset, struct
 				if (IS_IA32_PROCESS(&scr->pt)) {
 					scr->pt.r8 = scr->pt.r1;
 					scr->pt.cr_iip -= 2;
-				} else
+				} else {
 					ia64_decrement_ip(&scr->pt);
+					scr->pt.r10 = 0;
+				}
 				restart = 0; /* don't restart twice if handle_signal() fails... */
 			}
 		}
@@ -559,11 +529,19 @@ ia64_do_signal (sigset_t *oldset, struct
 		 * Whee!  Actually deliver the signal.  If the delivery failed, we need to
 		 * continue to iterate in this loop so we can deliver the SIGSEGV...
 		 */
-		if (handle_signal(signr, &ka, &info, oldset, scr))
-			return 1;
+		if (handle_signal(signr, &ka, &info, oldset, scr)) {
+			/* a signal was successfully delivered; the saved
+			 * sigmask will have been stored in the signal frame,
+			 * and will be restored by sigreturn, so we can simply
+			 * clear the TIF_RESTORE_SIGMASK flag */
+			if (test_thread_flag(TIF_RESTORE_SIGMASK))
+				clear_thread_flag(TIF_RESTORE_SIGMASK);
+			return;
+		}
 	}
 
 	/* Did we come from a system call? */
+no_signal:
 	if (restart) {
 		/* Restart the system call - no handlers present */
 		if (errno == ERESTARTNOHAND || errno == ERESTARTSYS || errno == ERESTARTNOINTR
@@ -583,10 +561,17 @@ ia64_do_signal (sigset_t *oldset, struct
 				ia64_decrement_ip(&scr->pt);
 				if (errno == ERESTART_RESTARTBLOCK)
 					scr->pt.r15 = __NR_restart_syscall;
+				scr->pt.r10 = 0;
 			}
 		}
 	}
-	return 0;
+
+	/* if there's no signal to deliver, we just put the saved sigmask
+	 * back */
+	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
+		clear_thread_flag(TIF_RESTORE_SIGMASK);
+		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
+	}
 }
 
 /* Set a delayed signal that was detected in MCA/INIT/NMI/PMI context where it
@@ -641,7 +626,7 @@ set_sigdelayed(pid_t pid, int signo, int
 	for (i = 1; i <= 3; ++i) {
 		switch (i) {
 		case 1:
-			t = find_task_by_pid(pid);
+			t = find_task_by_pid_ve(pid);
 			if (t)
 				start_time = start_time_ul(t);
 			break;
@@ -682,7 +667,7 @@ do_sigdelayed(void)
 	siginfo.si_code = current_thread_info()->sigdelayed.code;
 	siginfo.si_addr = current_thread_info()->sigdelayed.addr;
 	pid = current_thread_info()->sigdelayed.pid;
-	t = find_task_by_pid(pid);
+	t = find_task_by_pid_ve(pid);
 	if (!t)
 		return;
 	if (current_thread_info()->sigdelayed.start_time != start_time_ul(t))
diff -upr linux-2.6.16.46-0.12.orig/arch/ia64/kernel/sys_ia64.c linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/sys_ia64.c
--- linux-2.6.16.46-0.12.orig/arch/ia64/kernel/sys_ia64.c	2007-08-24 19:28:08.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/sys_ia64.c	2007-08-28 17:35:30.000000000 +0400
@@ -199,7 +199,7 @@ do_mmap2 (unsigned long addr, unsigned l
 
 	/* Careful about overflows.. */
 	len = PAGE_ALIGN(len);
-	if (!len || len > TASK_SIZE) {
+	if (len > TASK_SIZE) {
 		addr = -EINVAL;
 		goto out;
 	}
diff -upr linux-2.6.16.46-0.12.orig/arch/ia64/kernel/time.c linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/time.c
--- linux-2.6.16.46-0.12.orig/arch/ia64/kernel/time.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/time.c	2007-08-28 17:35:30.000000000 +0400
@@ -33,6 +33,8 @@
 extern unsigned long wall_jiffies;
 
 #define TIME_KEEPER_ID	0	/* smp_processor_id() of time-keeper */
+unsigned int cpu_khz;                                   /* TSC clocks / usec, not used here */
+EXPORT_SYMBOL(cpu_khz);
 
 #ifdef CONFIG_IA64_DEBUG_IRQ
 
@@ -226,6 +228,8 @@ ia64_init_itm (void)
 		register_time_interpolator(&itc_interpolator);
 	}
 
+	cpu_khz = local_cpu_data->proc_freq / 1000;
+
 	/* Setup the CPU local timer tick */
 	ia64_cpu_local_tick();
 }
diff -upr linux-2.6.16.46-0.12.orig/arch/ia64/kernel/traps.c linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/traps.c
--- linux-2.6.16.46-0.12.orig/arch/ia64/kernel/traps.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/traps.c	2007-08-28 17:35:36.000000000 +0400
@@ -58,34 +58,6 @@ trap_init (void)
 		fpswa_interface = __va(ia64_boot_param->fpswa);
 }
 
-/*
- * Unlock any spinlocks which will prevent us from getting the message out (timerlist_lock
- * is acquired through the console unblank code)
- */
-void
-bust_spinlocks (int yes)
-{
-	int loglevel_save = console_loglevel;
-
-	if (yes) {
-		oops_in_progress = 1;
-		return;
-	}
-
-#ifdef CONFIG_VT
-	unblank_screen();
-#endif
-	oops_in_progress = 0;
-	/*
-	 * OK, the message is on the console.  Now we call printk() without
-	 * oops_in_progress set so that printk will give klogd a poke.  Hold onto
-	 * your hats...
-	 */
-	console_loglevel = 15;		/* NMI oopser may have shut the console up */
-	printk(" ");
-	console_loglevel = loglevel_save;
-}
-
 void
 die (const char *str, struct pt_regs *regs, long err)
 {
@@ -122,6 +94,7 @@ die (const char *str, struct pt_regs *re
   	} else
 		printk(KERN_ERR "Recursive die() failure, output suppressed\n");
 
+	check_kernel_csum_bug();
 	bust_spinlocks(0);
 	die.lock_owner = -1;
 	spin_unlock_irq(&die.lock);
diff -upr linux-2.6.16.46-0.12.orig/arch/ia64/kernel/unaligned.c linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/unaligned.c
--- linux-2.6.16.46-0.12.orig/arch/ia64/kernel/unaligned.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/unaligned.c	2007-08-28 17:35:29.000000000 +0400
@@ -1290,7 +1290,7 @@ within_logging_rate_limit (void)
 {
 	static unsigned long count, last_time;
 
-	if (jiffies - last_time > 5*HZ)
+	if (jiffies - last_time > 60 * HZ)
 		count = 0;
 	if (count < 5) {
 		last_time = jiffies;
diff -upr linux-2.6.16.46-0.12.orig/arch/ia64/kernel/unwind.c linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/unwind.c
--- linux-2.6.16.46-0.12.orig/arch/ia64/kernel/unwind.c	2007-08-24 19:28:11.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/unwind.c	2007-08-28 17:35:30.000000000 +0400
@@ -1957,9 +1957,9 @@ EXPORT_SYMBOL(unw_unwind);
 int
 unw_unwind_to_user (struct unw_frame_info *info)
 {
-	unsigned long ip, sp, pr = 0;
+	unsigned long ip, sp, pr = info->pr;
 
-	while (unw_unwind(info) >= 0) {
+	do {
 		unw_get_sp(info, &sp);
 		if ((long)((unsigned long)info->task + IA64_STK_OFFSET - sp)
 		    < IA64_PT_REGS_SIZE) {
@@ -1977,7 +1977,7 @@ unw_unwind_to_user (struct unw_frame_inf
 				__FUNCTION__, ip);
 			return -1;
 		}
-	}
+	} while (unw_unwind(info) >= 0);
 	unw_get_ip(info, &ip);
 	UNW_DPRINT(0, "unwind.%s: failed to unwind to user-level (ip=0x%lx)\n",
 		   __FUNCTION__, ip);
diff -upr linux-2.6.16.46-0.12.orig/arch/ia64/kernel/vmlinux.lds.S linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/vmlinux.lds.S
--- linux-2.6.16.46-0.12.orig/arch/ia64/kernel/vmlinux.lds.S	2007-08-24 19:28:11.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/ia64/kernel/vmlinux.lds.S	2007-08-28 17:35:30.000000000 +0400
@@ -161,6 +161,7 @@ SECTIONS
 	  *(.kdb_initcall.init)
 	  __kdb_initcall_end = .;
 	}
+  . = ALIGN(8);
    __con_initcall_start = .;
   .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET)
 	{ *(.con_initcall.init) }
diff -upr linux-2.6.16.46-0.12.orig/arch/ia64/mm/contig.c linux-2.6.16.46-0.12-027test011/arch/ia64/mm/contig.c
--- linux-2.6.16.46-0.12.orig/arch/ia64/mm/contig.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/ia64/mm/contig.c	2007-08-28 17:35:31.000000000 +0400
@@ -64,6 +64,7 @@ show_mem (void)
 	printk("%ld pages in page table cache\n",
 		pgtable_quicklist_total_size());
 }
+EXPORT_SYMBOL(show_mem);
 
 /* physical address where the bootmem map is located */
 unsigned long bootmap_start;
diff -upr linux-2.6.16.46-0.12.orig/arch/ia64/mm/discontig.c linux-2.6.16.46-0.12-027test011/arch/ia64/mm/discontig.c
--- linux-2.6.16.46-0.12.orig/arch/ia64/mm/discontig.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/ia64/mm/discontig.c	2007-08-28 17:35:31.000000000 +0400
@@ -656,6 +656,7 @@ void show_mem(void)
 		pgtable_quicklist_total_size());
 	printk("%d free buffer pages\n", nr_free_buffer_pages());
 }
+EXPORT_SYMBOL(show_mem);
 
 /**
  * call_pernode_memory - use SRAT to call callback functions with node info
diff -upr linux-2.6.16.46-0.12.orig/arch/ia64/mm/fault.c linux-2.6.16.46-0.12-027test011/arch/ia64/mm/fault.c
--- linux-2.6.16.46-0.12.orig/arch/ia64/mm/fault.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/ia64/mm/fault.c	2007-08-28 17:35:30.000000000 +0400
@@ -163,7 +163,6 @@ ia64_do_page_fault (unsigned long addres
 	if ((vma->vm_flags & mask) != mask)
 		goto bad_area;
 
-  survive:
 	/*
 	 * If for any reason at all we couldn't handle the fault, make
 	 * sure we exit gracefully rather than endlessly redo the
@@ -288,13 +287,13 @@ ia64_do_page_fault (unsigned long addres
 
   out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (current->pid == 1) {
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
-	}
-	printk(KERN_CRIT "VM: killing process %s\n", current->comm);
-	if (user_mode(regs))
-		do_exit(SIGKILL);
+	if (user_mode(regs)) {
+		/*
+		 * 0-order allocation always success if something really
+		 * fatal not happen: beancounter overdraft or OOM.
+		 */
+		force_sig(SIGKILL, current);
+		return;
+	}
 	goto no_context;
 }
diff -upr linux-2.6.16.46-0.12.orig/arch/ia64/mm/hugetlbpage.c linux-2.6.16.46-0.12-027test011/arch/ia64/mm/hugetlbpage.c
--- linux-2.6.16.46-0.12.orig/arch/ia64/mm/hugetlbpage.c	2007-08-24 19:28:24.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/ia64/mm/hugetlbpage.c	2007-08-28 17:35:36.000000000 +0400
@@ -103,6 +103,9 @@ int pmd_huge(pmd_t pmd)
 {
 	return 0;
 }
+
+EXPORT_SYMBOL(pmd_huge);
+
 struct page *
 follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write)
 {
diff -upr linux-2.6.16.46-0.12.orig/arch/ia64/mm/init.c linux-2.6.16.46-0.12-027test011/arch/ia64/mm/init.c
--- linux-2.6.16.46-0.12.orig/arch/ia64/mm/init.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/ia64/mm/init.c	2007-08-28 17:35:30.000000000 +0400
@@ -37,6 +37,8 @@
 #include <asm/unistd.h>
 #include <asm/mca.h>
 
+#include <ub/ub_vmpages.h>
+
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
 DEFINE_PER_CPU(unsigned long *, __pgtable_quicklist);
@@ -96,7 +98,7 @@ check_pgt_cache(void)
 	preempt_disable();
 	while (unlikely((pages_to_free = min_pages_to_free()) > 0)) {
 		while (pages_to_free--) {
-			free_page((unsigned long)pgtable_quicklist_alloc());
+			free_page((unsigned long)pgtable_quicklist_alloc(0));
 		}
 		preempt_enable();
 		preempt_disable();
@@ -146,6 +148,10 @@ ia64_init_addr_space (void)
 
 	ia64_set_rbs_bot();
 
+	if (ub_memory_charge(current->mm, PAGE_SIZE, VM_DATA_DEFAULT_FLAGS,
+				NULL, UB_SOFT))
+		goto skip;
+
 	/*
 	 * If we're out of memory and kmem_cache_alloc() returns NULL, we simply ignore
 	 * the problem.  When the process attempts to write to the register backing store
@@ -163,11 +169,16 @@ ia64_init_addr_space (void)
 		if (insert_vm_struct(current->mm, vma)) {
 			up_write(&current->mm->mmap_sem);
 			kmem_cache_free(vm_area_cachep, vma);
+			ub_memory_uncharge(current->mm, PAGE_SIZE,
+					VM_DATA_DEFAULT_FLAGS, NULL);
 			return;
 		}
 		up_write(&current->mm->mmap_sem);
-	}
+	} else
+		ub_memory_uncharge(current->mm, PAGE_SIZE,
+				VM_DATA_DEFAULT_FLAGS, NULL);
 
+skip:
 	/* map NaT-page at address zero to speed up speculative dereferencing of NULL: */
 	if (!(current->personality & MMAP_PAGE_ZERO)) {
 		vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
diff -upr linux-2.6.16.46-0.12.orig/arch/mips/kernel/sysirix.c linux-2.6.16.46-0.12-027test011/arch/mips/kernel/sysirix.c
--- linux-2.6.16.46-0.12.orig/arch/mips/kernel/sysirix.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/mips/kernel/sysirix.c	2007-08-28 17:35:31.000000000 +0400
@@ -110,7 +110,7 @@ asmlinkage int irix_prctl(unsigned optio
 		printk("irix_prctl[%s:%d]: Wants PR_ISBLOCKED\n",
 		       current->comm, current->pid);
 		read_lock(&tasklist_lock);
-		task = find_task_by_pid(va_arg(args, pid_t));
+		task = find_task_by_pid_ve(va_arg(args, pid_t));
 		error = -ESRCH;
 		if (error)
 			error = (task->run_list.next != NULL);
diff -upr linux-2.6.16.46-0.12.orig/arch/powerpc/Kconfig linux-2.6.16.46-0.12-027test011/arch/powerpc/Kconfig
--- linux-2.6.16.46-0.12.orig/arch/powerpc/Kconfig	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/powerpc/Kconfig	2007-08-28 17:35:34.000000000 +0400
@@ -536,6 +536,7 @@ config HIGHMEM
 	bool "High memory support"
 	depends on PPC32
 
+source "kernel/Kconfig.fairsched"
 source kernel/Kconfig.hz
 source kernel/Kconfig.preempt
 source "fs/Kconfig.binfmt"
@@ -980,6 +981,8 @@ source "arch/powerpc/platforms/iseries/K
 
 source "lib/Kconfig"
 
+source "kernel/ub/Kconfig"
+
 menu "Instrumentation Support"
         depends on EXPERIMENTAL
 
@@ -998,6 +1001,8 @@ endmenu
 
 source "arch/powerpc/Kconfig.debug"
 
+source "kernel/Kconfig.openvz"
+
 source "security/Kconfig"
 
 config KEYS_COMPAT
diff -upr linux-2.6.16.46-0.12.orig/arch/powerpc/kernel/misc_32.S linux-2.6.16.46-0.12-027test011/arch/powerpc/kernel/misc_32.S
--- linux-2.6.16.46-0.12.orig/arch/powerpc/kernel/misc_32.S	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/powerpc/kernel/misc_32.S	2007-08-28 17:35:31.000000000 +0400
@@ -973,7 +973,7 @@ _GLOBAL(_get_SP)
  * Create a kernel thread
  *   kernel_thread(fn, arg, flags)
  */
-_GLOBAL(kernel_thread)
+_GLOBAL(ppc_kernel_thread)
 	stwu	r1,-16(r1)
 	stw	r30,8(r1)
 	stw	r31,12(r1)
diff -upr linux-2.6.16.46-0.12.orig/arch/powerpc/kernel/misc_64.S linux-2.6.16.46-0.12-027test011/arch/powerpc/kernel/misc_64.S
--- linux-2.6.16.46-0.12.orig/arch/powerpc/kernel/misc_64.S	2007-08-24 19:28:10.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/powerpc/kernel/misc_64.S	2007-08-28 17:35:31.000000000 +0400
@@ -681,7 +681,7 @@ _GLOBAL(scom970_write)
  * Create a kernel thread
  *   kernel_thread(fn, arg, flags)
  */
-_GLOBAL(kernel_thread)
+_GLOBAL(ppc_kernel_thread)
 	std	r29,-24(r1)
 	std	r30,-16(r1)
 	stdu	r1,-STACK_FRAME_OVERHEAD(r1)
diff -upr linux-2.6.16.46-0.12.orig/arch/powerpc/kernel/process.c linux-2.6.16.46-0.12-027test011/arch/powerpc/kernel/process.c
--- linux-2.6.16.46-0.12.orig/arch/powerpc/kernel/process.c	2007-08-24 19:28:30.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/powerpc/kernel/process.c	2007-08-28 17:35:34.000000000 +0400
@@ -433,7 +433,7 @@ void show_regs(struct pt_regs * regs)
 	       current, current->pid, current->comm, task_thread_info(current));
 
 #ifdef CONFIG_SMP
-	printk(" CPU: %d", smp_processor_id());
+	printk(" CPU: %d, VCPU: %d:%d", smp_processor_id(), task_vsched_id(current), task_cpu(current));
 #endif /* CONFIG_SMP */
 
 	for (i = 0;  i < 32;  i++) {
@@ -827,7 +827,7 @@ static inline int valid_irq_stack(unsign
 				  unsigned long nbytes)
 {
 	unsigned long stack_page;
-	unsigned long cpu = task_cpu(p);
+	unsigned long cpu = task_pcpu(p);
 
 	/*
 	 * Avoid crashing if the stack has overflowed and corrupted
@@ -966,6 +966,20 @@ void dump_stack(void)
 }
 EXPORT_SYMBOL(dump_stack);
 
+long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
+{
+	extern long ppc_kernel_thread(int (*fn)(void *), void *arg,
+			unsigned long flags);
+
+	if (!ve_is_super(get_exec_env())) {
+		printk("kernel_thread call inside VE\n");
+		dump_stack();
+		return -EPERM;
+	}
+
+	return ppc_kernel_thread(fn, arg, flags);
+}
+
 #ifdef CONFIG_PPC64
 void ppc64_runlatch_on(void)
 {
diff -upr linux-2.6.16.46-0.12.orig/arch/powerpc/kernel/syscalls.c linux-2.6.16.46-0.12-027test011/arch/powerpc/kernel/syscalls.c
--- linux-2.6.16.46-0.12.orig/arch/powerpc/kernel/syscalls.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/powerpc/kernel/syscalls.c	2007-08-28 17:35:31.000000000 +0400
@@ -259,7 +259,7 @@ long ppc_newuname(struct new_utsname __u
 	int err = 0;
 
 	down_read(&uts_sem);
-	if (copy_to_user(name, &system_utsname, sizeof(*name)))
+	if (copy_to_user(name, &ve_utsname, sizeof(*name)))
 		err = -EFAULT;
 	up_read(&uts_sem);
 	if (!err)
@@ -272,7 +272,7 @@ int sys_uname(struct old_utsname __user 
 	int err = 0;
 	
 	down_read(&uts_sem);
-	if (copy_to_user(name, &system_utsname, sizeof(*name)))
+	if (copy_to_user(name, &ve_utsname, sizeof(*name)))
 		err = -EFAULT;
 	up_read(&uts_sem);
 	if (!err)
@@ -288,19 +288,19 @@ int sys_olduname(struct oldold_utsname _
 		return -EFAULT;
   
 	down_read(&uts_sem);
-	error = __copy_to_user(&name->sysname, &system_utsname.sysname,
+	error = __copy_to_user(&name->sysname, &ve_utsname.sysname,
 			       __OLD_UTS_LEN);
 	error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->nodename, &system_utsname.nodename,
+	error |= __copy_to_user(&name->nodename, &ve_utsname.nodename,
 				__OLD_UTS_LEN);
 	error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->release, &system_utsname.release,
+	error |= __copy_to_user(&name->release, &ve_utsname.release,
 				__OLD_UTS_LEN);
 	error |= __put_user(0, name->release + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->version, &system_utsname.version,
+	error |= __copy_to_user(&name->version, &ve_utsname.version,
 				__OLD_UTS_LEN);
 	error |= __put_user(0, name->version + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->machine, &system_utsname.machine,
+	error |= __copy_to_user(&name->machine, &ve_utsname.machine,
 				__OLD_UTS_LEN);
 	error |= override_machine(name->machine);
 	up_read(&uts_sem);
diff -upr linux-2.6.16.46-0.12.orig/arch/powerpc/kernel/systbl.S linux-2.6.16.46-0.12-027test011/arch/powerpc/kernel/systbl.S
--- linux-2.6.16.46-0.12.orig/arch/powerpc/kernel/systbl.S	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/powerpc/kernel/systbl.S	2007-08-28 17:35:34.000000000 +0400
@@ -340,3 +340,22 @@ SYSCALL(fchmodat)
 SYSCALL(faccessat)
 SYSCALL(ni_syscall) /* COMPAT_SYS(get_robust_list) */
 SYSCALL(ni_syscall) /* COMPAT_SYS(set_robust_list) */
+
+.rept 400-283
+SYSCALL(ni_syscall)
+.endr
+
+SYSCALL(fairsched_mknod)
+SYSCALL(fairsched_rmnod)
+SYSCALL(fairsched_chwt)
+SYSCALL(fairsched_mvpr)
+SYSCALL(fairsched_rate)
+
+.rept 410-405
+SYSCALL(ni_syscall)
+.endr
+
+SYSCALL(getluid)
+SYSCALL(setluid)
+SYSCALL(setublimit)
+SYSCALL(ubstat)
diff -upr linux-2.6.16.46-0.12.orig/arch/powerpc/mm/fault.c linux-2.6.16.46-0.12-027test011/arch/powerpc/mm/fault.c
--- linux-2.6.16.46-0.12.orig/arch/powerpc/mm/fault.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/powerpc/mm/fault.c	2007-08-28 17:35:30.000000000 +0400
@@ -307,7 +307,6 @@ good_area:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
- survive:
 	switch (handle_mm_fault(mm, vma, address, is_write)) {
 
 	case VM_FAULT_MINOR:
@@ -351,14 +350,12 @@ bad_area_nosemaphore:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (current->pid == 1) {
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
-	}
-	printk("VM: killing process %s\n", current->comm);
 	if (user_mode(regs))
-		do_exit(SIGKILL);
+		/*
+		 * 0-order allocation always success if something really
+		 * fatal not happen: beancounter overdraft or OOM. Den
+		 */
+		force_sig(SIGKILL, current);
 	return SIGKILL;
 
 do_sigbus:
diff -upr linux-2.6.16.46-0.12.orig/arch/powerpc/mm/init_64.c linux-2.6.16.46-0.12-027test011/arch/powerpc/mm/init_64.c
--- linux-2.6.16.46-0.12.orig/arch/powerpc/mm/init_64.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/powerpc/mm/init_64.c	2007-08-28 17:35:30.000000000 +0400
@@ -225,7 +225,8 @@ void pgtable_cache_init(void)
 		pgtable_cache[i] = kmem_cache_create(name,
 						     size, size,
 						     SLAB_HWCACHE_ALIGN |
-						     SLAB_MUST_HWCACHE_ALIGN,
+						     SLAB_MUST_HWCACHE_ALIGN |
+						     SLAB_UBC | SLAB_NO_CHARGE,
 						     zero_ctor,
 						     NULL);
 		if (! pgtable_cache[i])
diff -upr linux-2.6.16.46-0.12.orig/arch/powerpc/mm/mem.c linux-2.6.16.46-0.12-027test011/arch/powerpc/mm/mem.c
--- linux-2.6.16.46-0.12.orig/arch/powerpc/mm/mem.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/powerpc/mm/mem.c	2007-08-28 17:35:31.000000000 +0400
@@ -222,6 +222,7 @@ void show_mem(void)
 	printk("%ld pages shared\n", shared);
 	printk("%ld pages swap cached\n", cached);
 }
+EXPORT_SYMBOL(show_mem);
 
 /*
  * Initialize the bootmem system and give it all the memory we
diff -upr linux-2.6.16.46-0.12.orig/arch/powerpc/mm/pgtable_32.c linux-2.6.16.46-0.12-027test011/arch/powerpc/mm/pgtable_32.c
--- linux-2.6.16.46-0.12.orig/arch/powerpc/mm/pgtable_32.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/powerpc/mm/pgtable_32.c	2007-08-28 17:35:30.000000000 +0400
@@ -85,7 +85,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	pgd_t *ret;
 
-	ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER);
+	ret = (pgd_t *)__get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC |
+			__GFP_ZERO, PGDIR_ORDER);
 	return ret;
 }
 
@@ -119,6 +120,7 @@ struct page *pte_alloc_one(struct mm_str
 #else
 	gfp_t flags = GFP_KERNEL | __GFP_REPEAT;
 #endif
+	flags |= (__GFP_UBC | __GFP_SOFT_UBC);
 
 	ptepage = alloc_pages(flags, 0);
 	if (ptepage)
diff -upr linux-2.6.16.46-0.12.orig/arch/ppc/Kconfig linux-2.6.16.46-0.12-027test011/arch/ppc/Kconfig
--- linux-2.6.16.46-0.12.orig/arch/ppc/Kconfig	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/ppc/Kconfig	2007-08-28 17:35:34.000000000 +0400
@@ -920,6 +920,7 @@ config NR_CPUS
 config HIGHMEM
 	bool "High memory support"
 
+source "kernel/Kconfig.fairsched"
 source kernel/Kconfig.hz
 source kernel/Kconfig.preempt
 source "mm/Kconfig"
@@ -1394,6 +1395,10 @@ source "arch/powerpc/oprofile/Kconfig"
 
 source "arch/ppc/Kconfig.debug"
 
+source "kernel/Kconfig.openvz"
+
 source "security/Kconfig"
 
+source "kernel/ub/Kconfig"
+
 source "crypto/Kconfig"
diff -upr linux-2.6.16.46-0.12.orig/arch/ppc/kernel/misc.S linux-2.6.16.46-0.12-027test011/arch/ppc/kernel/misc.S
--- linux-2.6.16.46-0.12.orig/arch/ppc/kernel/misc.S	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/ppc/kernel/misc.S	2007-08-28 17:35:31.000000000 +0400
@@ -1004,7 +1004,7 @@ _GLOBAL(_get_SP)
  * Create a kernel thread
  *   kernel_thread(fn, arg, flags)
  */
-_GLOBAL(kernel_thread)
+_GLOBAL(ppc_kernel_thread)
 	stwu	r1,-16(r1)
 	stw	r30,8(r1)
 	stw	r31,12(r1)
diff -upr linux-2.6.16.46-0.12.orig/arch/ppc/mm/fault.c linux-2.6.16.46-0.12-027test011/arch/ppc/mm/fault.c
--- linux-2.6.16.46-0.12.orig/arch/ppc/mm/fault.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/ppc/mm/fault.c	2007-08-28 17:35:30.000000000 +0400
@@ -247,7 +247,6 @@ good_area:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
- survive:
         switch (handle_mm_fault(mm, vma, address, is_write)) {
         case VM_FAULT_MINOR:
                 current->min_flt++;
@@ -290,14 +289,12 @@ bad_area:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (current->pid == 1) {
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
-	}
-	printk("VM: killing process %s\n", current->comm);
 	if (user_mode(regs))
-		do_exit(SIGKILL);
+		/*
+		 * 0-order allocation always success if something really
+		 * fatal not happen: beancounter overdraft or OOM. Den
+		 */
+		force_sig(SIGKILL, current);
 	return SIGKILL;
 
 do_sigbus:
diff -upr linux-2.6.16.46-0.12.orig/arch/ppc/mm/init.c linux-2.6.16.46-0.12-027test011/arch/ppc/mm/init.c
--- linux-2.6.16.46-0.12.orig/arch/ppc/mm/init.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/ppc/mm/init.c	2007-08-28 17:35:31.000000000 +0400
@@ -132,6 +132,7 @@ void show_mem(void)
 	printk("%d pages shared\n",shared);
 	printk("%d pages swap cached\n",cached);
 }
+EXPORT_SYMBOL(show_mem);
 
 /* Free up now-unused memory */
 static void free_sec(unsigned long start, unsigned long end, const char *name)
diff -upr linux-2.6.16.46-0.12.orig/arch/ppc/mm/pgtable.c linux-2.6.16.46-0.12-027test011/arch/ppc/mm/pgtable.c
--- linux-2.6.16.46-0.12.orig/arch/ppc/mm/pgtable.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/ppc/mm/pgtable.c	2007-08-28 17:35:30.000000000 +0400
@@ -84,7 +84,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	pgd_t *ret;
 
-	ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER);
+	ret = (pgd_t *)__get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC |
+			__GFP_ZERO, PGDIR_ORDER);
 	return ret;
 }
 
@@ -118,6 +119,7 @@ struct page *pte_alloc_one(struct mm_str
 #else
 	gfp_t flags = GFP_KERNEL | __GFP_REPEAT;
 #endif
+	flags |= (__GFP_UBC | __GFP_SOFT_UBC);
 
 	ptepage = alloc_pages(flags, 0);
 	if (ptepage)
diff -upr linux-2.6.16.46-0.12.orig/arch/s390/Kconfig linux-2.6.16.46-0.12-027test011/arch/s390/Kconfig
--- linux-2.6.16.46-0.12.orig/arch/s390/Kconfig	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/s390/Kconfig	2007-08-28 17:35:31.000000000 +0400
@@ -528,8 +528,12 @@ source "lib/Kconfig.statistic"
 
 source "arch/s390/Kconfig.debug"
 
+source "kernel/Kconfig.openvz"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
 
 source "lib/Kconfig"
+
+source "kernel/ub/Kconfig"
diff -upr linux-2.6.16.46-0.12.orig/arch/s390/kernel/process.c linux-2.6.16.46-0.12-027test011/arch/s390/kernel/process.c
--- linux-2.6.16.46-0.12.orig/arch/s390/kernel/process.c	2007-08-24 19:28:22.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/s390/kernel/process.c	2007-08-28 17:35:31.000000000 +0400
@@ -164,9 +164,10 @@ void show_regs(struct pt_regs *regs)
 	struct task_struct *tsk = current;
 
         printk("CPU:    %d    %s\n", task_thread_info(tsk)->cpu, print_tainted());
-        printk("Process %s (pid: %d, task: %p, ksp: %p)\n",
-	       current->comm, current->pid, (void *) tsk,
-	       (void *) tsk->thread.ksp);
+	printk("Process %s (pid: %d, veid: %d, task: %p, ksp: %p)\n",
+		current->comm, current->pid,
+		VEID(VE_TASK_INFO(current)->owner_env),
+		(void *) tsk, (void *) tsk->thread.ksp);
 
 	show_registers(regs);
 	/* Show stack backtrace if pt_regs is from kernel mode */
@@ -187,6 +188,13 @@ int kernel_thread(int (*fn)(void *), voi
 {
 	struct pt_regs regs;
 
+	if (!ve_is_super(get_exec_env())) {
+		/* Don't allow kernel_thread() inside VE */
+		printk("kernel_thread call inside VE\n");
+		dump_stack();
+		return -EPERM;
+	}
+
 	memset(&regs, 0, sizeof(regs));
 	regs.psw.mask = psw_kernel_bits | PSW_MASK_IO | PSW_MASK_EXT;
 	regs.psw.addr = (unsigned long) kernel_thread_starter | PSW_ADDR_AMODE;
diff -upr linux-2.6.16.46-0.12.orig/arch/s390/kernel/smp.c linux-2.6.16.46-0.12-027test011/arch/s390/kernel/smp.c
--- linux-2.6.16.46-0.12.orig/arch/s390/kernel/smp.c	2007-08-24 19:28:24.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/s390/kernel/smp.c	2007-08-28 17:35:31.000000000 +0400
@@ -518,6 +518,17 @@ int __devinit start_secondary(void *cpuv
 {
         /* Setup the cpu */
         cpu_init();
+
+#ifdef CONFIG_VE
+	/* TSC reset. kill whatever might rely on old values */
+	VE_TASK_INFO(current)->wakeup_stamp = 0;
+	/*
+	 * Cosmetic: sleep_time won't be changed afterwards for the idle
+	 * thread;  keep it 0 rather than -cycles.
+	 */
+	VE_TASK_INFO(idle)->sleep_time = 0;
+#endif
+
 	preempt_disable();
         /* init per CPU timer */
         init_cpu_timer();
@@ -826,6 +837,11 @@ void __init smp_prepare_cpus(unsigned in
 	for_each_cpu(cpu)
 		if (cpu != smp_processor_id())
 			smp_create_idle(cpu);
+
+#ifdef CONFIG_VE
+	/* TSC reset. kill whatever might rely on old values */
+	VE_TASK_INFO(current)->wakeup_stamp = 0;
+#endif
 }
 
 void __devinit smp_prepare_boot_cpu(void)
diff -upr linux-2.6.16.46-0.12.orig/arch/s390/kernel/syscalls.S linux-2.6.16.46-0.12-027test011/arch/s390/kernel/syscalls.S
--- linux-2.6.16.46-0.12.orig/arch/s390/kernel/syscalls.S	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/s390/kernel/syscalls.S	2007-08-28 17:35:30.000000000 +0400
@@ -312,3 +312,12 @@ SYSCALL(sys_faccessat,sys_faccessat,sys_
 SYSCALL(sys_pselect6,sys_pselect6,compat_sys_pselect6_wrapper)
 SYSCALL(sys_ppoll,sys_ppoll,compat_sys_ppoll_wrapper)
 SYSCALL(sys_unshare,sys_unshare,sys_unshare_wrapper)
+
+.rept 410-(.-sys_call_table)/4
+	NI_SYSCALL
+.endr
+
+SYSCALL(sys_getluid, sys_getluid, sys_ni_syscall)	/* 410 */
+SYSCALL(sys_setluid, sys_setluid, sys_ni_syscall)
+SYSCALL(sys_setublimit, sys_setublimit, sys_ni_syscall)
+SYSCALL(sys_ubstat, sys_ubstat, sys_ni_syscall)
diff -upr linux-2.6.16.46-0.12.orig/arch/s390/mm/fault.c linux-2.6.16.46-0.12-027test011/arch/s390/mm/fault.c
--- linux-2.6.16.46-0.12.orig/arch/s390/mm/fault.c	2007-08-24 19:28:24.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/s390/mm/fault.c	2007-08-28 17:35:29.000000000 +0400
@@ -114,17 +114,9 @@ void bust_spinlocks(int yes)
 	if (yes) {
 		oops_in_progress = 1;
 	} else {
-		int loglevel_save = console_loglevel;
 		console_unblank();
 		oops_in_progress = 0;
-		/*
-		 * OK, the message is on the console.  Now we call printk()
-		 * without oops_in_progress set so that printk will give klogd
-		 * a poke.  Hold onto your hats...
-		 */
-		console_loglevel = 15;
-		printk(" ");
-		console_loglevel = loglevel_save;
+		wake_up_klogd();
 	}
 }
 
diff -upr linux-2.6.16.46-0.12.orig/arch/s390/mm/init.c linux-2.6.16.46-0.12-027test011/arch/s390/mm/init.c
--- linux-2.6.16.46-0.12.orig/arch/s390/mm/init.c	2007-08-24 19:28:22.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/s390/mm/init.c	2007-08-28 17:35:31.000000000 +0400
@@ -91,6 +91,7 @@ void show_mem(void)
         printk("%d pages shared\n",shared);
         printk("%d pages swap cached\n",cached);
 }
+EXPORT_SYMBOL(show_mem);
 
 /* References to section boundaries */
 
diff -upr linux-2.6.16.46-0.12.orig/arch/sh/kernel/kgdb_stub.c linux-2.6.16.46-0.12-027test011/arch/sh/kernel/kgdb_stub.c
--- linux-2.6.16.46-0.12.orig/arch/sh/kernel/kgdb_stub.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/sh/kernel/kgdb_stub.c	2007-08-28 17:35:31.000000000 +0400
@@ -412,7 +412,7 @@ static struct task_struct *get_thread(in
 	if (pid == PID_MAX) pid = 0;
 
 	/* First check via PID */
-	thread = find_task_by_pid(pid);
+	thread = find_task_by_pid_all(pid);
 
 	if (thread)
 		return thread;
diff -upr linux-2.6.16.46-0.12.orig/arch/sh64/kernel/process.c linux-2.6.16.46-0.12-027test011/arch/sh64/kernel/process.c
--- linux-2.6.16.46-0.12.orig/arch/sh64/kernel/process.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/sh64/kernel/process.c	2007-08-28 17:35:31.000000000 +0400
@@ -906,7 +906,7 @@ asids_proc_info(char *buf, char **start,
 	int len=0;
 	struct task_struct *p;
 	read_lock(&tasklist_lock);
-	for_each_process(p) {
+	for_each_process_ve(p) {
 		int pid = p->pid;
 		struct mm_struct *mm;
 		if (!pid) continue;
diff -upr linux-2.6.16.46-0.12.orig/arch/sparc64/Kconfig linux-2.6.16.46-0.12-027test011/arch/sparc64/Kconfig
--- linux-2.6.16.46-0.12.orig/arch/sparc64/Kconfig	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/sparc64/Kconfig	2007-08-28 17:35:31.000000000 +0400
@@ -130,6 +130,8 @@ config NR_CPUS
 	depends on SMP
 	default "32"
 
+source "kernel/Kconfig.fairsched"
+
 source "drivers/cpufreq/Kconfig"
 
 config US3_FREQ
@@ -394,8 +396,12 @@ endmenu
 
 source "arch/sparc64/Kconfig.debug"
 
+source "kernel/Kconfig.openvz"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
 
 source "lib/Kconfig"
+
+source "kernel/ub/Kconfig"
diff -upr linux-2.6.16.46-0.12.orig/arch/sparc64/kernel/process.c linux-2.6.16.46-0.12-027test011/arch/sparc64/kernel/process.c
--- linux-2.6.16.46-0.12.orig/arch/sparc64/kernel/process.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/sparc64/kernel/process.c	2007-08-28 17:35:31.000000000 +0400
@@ -697,6 +697,13 @@ pid_t kernel_thread(int (*fn)(void *), v
 {
 	long retval;
 
+	/* Don't allow kernel_thread() inside VE */
+	if (!ve_is_super(get_exec_env())) {
+		printk("kernel_thread call inside VE\n");
+		dump_stack();
+		return -EPERM;
+	}
+
 	/* If the parent runs before fn(arg) is called by the child,
 	 * the input registers of this function can be clobbered.
 	 * So we stash 'fn' and 'arg' into global registers which
diff -upr linux-2.6.16.46-0.12.orig/arch/sparc64/kernel/setup.c linux-2.6.16.46-0.12-027test011/arch/sparc64/kernel/setup.c
--- linux-2.6.16.46-0.12.orig/arch/sparc64/kernel/setup.c	2007-08-24 19:28:29.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/sparc64/kernel/setup.c	2007-08-28 17:35:31.000000000 +0400
@@ -156,7 +156,7 @@ int prom_callback(long *args)
 			pte_t *ptep;
 			pte_t pte;
 
-			for_each_process(p) {
+			for_each_process_all(p) {
 				mm = p->mm;
 				if (CTX_NRBITS(mm->context) == ctx)
 					break;
diff -upr linux-2.6.16.46-0.12.orig/arch/sparc64/kernel/sys_sparc32.c linux-2.6.16.46-0.12-027test011/arch/sparc64/kernel/sys_sparc32.c
--- linux-2.6.16.46-0.12.orig/arch/sparc64/kernel/sys_sparc32.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/sparc64/kernel/sys_sparc32.c	2007-08-28 17:35:30.000000000 +0400
@@ -842,7 +842,7 @@ asmlinkage long sys32_utimes(char __user
 			return -EFAULT;
 	}
 
-	return do_utimes(AT_FDCWD, filename, (tvs ? &ktvs[0] : NULL));
+	return do_utimes(AT_FDCWD, filename, (tvs ? &ktvs[0] : NULL), 0);
 }
 
 /* These are here just in case some old sparc32 binary calls it. */
diff -upr linux-2.6.16.46-0.12.orig/arch/sparc64/kernel/traps.c linux-2.6.16.46-0.12-027test011/arch/sparc64/kernel/traps.c
--- linux-2.6.16.46-0.12.orig/arch/sparc64/kernel/traps.c	2007-08-24 19:28:10.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/sparc64/kernel/traps.c	2007-08-28 17:35:31.000000000 +0400
@@ -1898,6 +1898,10 @@ void die_if_kernel(char *str, struct pt_
 "                 \\__U_/\n");
 
 	printk("%s(%d): %s [#%d]\n", current->comm, current->pid, str, ++die_counter);
+	printk("VE:EXCVE %d:%d, CPU %d, VCPU %d:%d\n",
+		VEID(VE_TASK_INFO(current)->owner_env), VEID(get_exec_env()),
+		smp_processor_id(),
+		task_vsched_id(current), task_cpu(current));
 	notify_die(DIE_OOPS, str, regs, 0, 255, SIGSEGV);
 	__asm__ __volatile__("flushw");
 	__show_regs(regs);
diff -upr linux-2.6.16.46-0.12.orig/arch/sparc64/mm/init.c linux-2.6.16.46-0.12-027test011/arch/sparc64/mm/init.c
--- linux-2.6.16.46-0.12.orig/arch/sparc64/mm/init.c	2007-08-24 19:28:08.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/sparc64/mm/init.c	2007-08-28 17:35:31.000000000 +0400
@@ -416,6 +416,7 @@ static inline int in_obp_range(unsigned 
 	return (vaddr >= LOW_OBP_ADDRESS &&
 		vaddr < HI_OBP_ADDRESS);
 }
+EXPORT_SYMBOL(show_mem);
 
 static int cmp_ptrans(const void *a, const void *b)
 {
diff -upr linux-2.6.16.46-0.12.orig/arch/um/drivers/mconsole_kern.c linux-2.6.16.46-0.12-027test011/arch/um/drivers/mconsole_kern.c
--- linux-2.6.16.46-0.12.orig/arch/um/drivers/mconsole_kern.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/um/drivers/mconsole_kern.c	2007-08-28 17:35:31.000000000 +0400
@@ -600,7 +600,7 @@ static void do_stack_trace(struct mc_req
 
 	from = current;
 
-	to = find_task_by_pid(pid_requested);
+	to = find_task_by_pid_all(pid_requested);
 	if((to == NULL) || (pid_requested == 0)) {
 		mconsole_reply(req, "Couldn't find that pid", 1, 0);
 		return;
diff -upr linux-2.6.16.46-0.12.orig/arch/um/kernel/skas/process_kern.c linux-2.6.16.46-0.12-027test011/arch/um/kernel/skas/process_kern.c
--- linux-2.6.16.46-0.12.orig/arch/um/kernel/skas/process_kern.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/um/kernel/skas/process_kern.c	2007-08-28 17:35:31.000000000 +0400
@@ -197,7 +197,7 @@ void kill_off_processes_skas(void)
 		int pid, me;
 
 		me = os_getpid();
-		for_each_process(p){
+		for_each_process_all(p){
 			if(p->mm == NULL)
 				continue;
 
diff -upr linux-2.6.16.46-0.12.orig/arch/um/kernel/tt/process_kern.c linux-2.6.16.46-0.12-027test011/arch/um/kernel/tt/process_kern.c
--- linux-2.6.16.46-0.12.orig/arch/um/kernel/tt/process_kern.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/um/kernel/tt/process_kern.c	2007-08-28 17:35:31.000000000 +0400
@@ -301,7 +301,7 @@ void kill_off_processes_tt(void)
 	int me;
 
 	me = os_getpid();
-        for_each_process(p){
+	for_each_process_all(p){
 		if(p->thread.mode.tt.extern_pid != me) 
 			os_kill_process(p->thread.mode.tt.extern_pid, 0);
 	}
@@ -444,7 +444,7 @@ int is_valid_pid(int pid)
 	struct task_struct *task;
 
         read_lock(&tasklist_lock);
-        for_each_process(task){
+	for_each_process_all(task){
                 if(task->thread.mode.tt.extern_pid == pid){
 			read_unlock(&tasklist_lock);
 			return(1);
diff -upr linux-2.6.16.46-0.12.orig/arch/x86_64/Kconfig linux-2.6.16.46-0.12-027test011/arch/x86_64/Kconfig
--- linux-2.6.16.46-0.12.orig/arch/x86_64/Kconfig	2007-08-24 19:28:37.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/x86_64/Kconfig	2007-08-28 17:35:34.000000000 +0400
@@ -283,6 +283,8 @@ config SCHED_MC
 	  making when dealing with multi-core CPU chips at a cost of slightly
 	  increased overhead in some places. If unsure say N here.
 
+source "kernel/Kconfig.fairsched"
+
 source "kernel/Kconfig.preempt"
 
 config NUMA
@@ -678,6 +680,8 @@ endmenu
 
 source "arch/x86_64/Kconfig.debug"
 
+source "kernel/Kconfig.openvz"
+
 source "security/Kconfig"
 
 source "crypto/Kconfig"
@@ -685,3 +689,5 @@ source "crypto/Kconfig"
 source "drivers/xen/Kconfig"
 
 source "lib/Kconfig"
+
+source "kernel/ub/Kconfig"
diff -upr linux-2.6.16.46-0.12.orig/arch/x86_64/boot/compressed/head.S linux-2.6.16.46-0.12-027test011/arch/x86_64/boot/compressed/head.S
--- linux-2.6.16.46-0.12.orig/arch/x86_64/boot/compressed/head.S	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/x86_64/boot/compressed/head.S	2007-08-28 17:35:33.000000000 +0400
@@ -34,7 +34,7 @@
 startup_32:
 	cld
 	cli
-	movl $(__KERNEL_DS),%eax
+	movl $(__BOOT_DS),%eax
 	movl %eax,%ds
 	movl %eax,%es
 	movl %eax,%fs
@@ -76,7 +76,7 @@ startup_32:
 	jnz  3f
 	addl $8,%esp
 	xorl %ebx,%ebx
-	ljmp $(__KERNEL_CS), $__PHYSICAL_START
+	ljmp $(__BOOT_CS), $__PHYSICAL_START
 
 /*
  * We come here, if we were loaded high.
@@ -104,7 +104,7 @@ startup_32:
 	popl %eax	# hcount
 	movl $__PHYSICAL_START,%edi
 	cli		# make sure we don't get interrupted
-	ljmp $(__KERNEL_CS), $0x1000 # and jump to the move routine
+	ljmp $(__BOOT_CS), $0x1000 # and jump to the move routine
 
 /*
  * Routine (template) for moving the decompressed kernel in place,
@@ -127,7 +127,7 @@ move_routine_start:
 	movsl
 	movl %ebx,%esi	# Restore setup pointer
 	xorl %ebx,%ebx
-	ljmp $(__KERNEL_CS), $__PHYSICAL_START
+	ljmp $(__BOOT_CS), $__PHYSICAL_START
 move_routine_end:
 
 
@@ -137,5 +137,5 @@ user_stack:	 	
 	.fill 4096,4,0
 stack_start:	
 	.long user_stack+4096
-	.word __KERNEL_DS
+	.word __BOOT_DS
 
diff -upr linux-2.6.16.46-0.12.orig/arch/x86_64/boot/setup.S linux-2.6.16.46-0.12-027test011/arch/x86_64/boot/setup.S
--- linux-2.6.16.46-0.12.orig/arch/x86_64/boot/setup.S	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/x86_64/boot/setup.S	2007-08-28 17:35:33.000000000 +0400
@@ -729,7 +729,7 @@ flush_instr:
 	subw	$DELTA_INITSEG, %si
 	shll	$4, %esi			# Convert to 32-bit pointer
 # NOTE: For high loaded big kernels we need a
-#	jmpi    0x100000,__KERNEL_CS
+#	jmpi    0x100000,__BOOT_CS
 #
 #	but we yet haven't reloaded the CS register, so the default size 
 #	of the target offset still is 16 bit.
@@ -740,7 +740,7 @@ flush_instr:
 	.byte 0x66, 0xea			# prefix + jmpi-opcode
 code32:	.long	0x1000				# will be set to 0x100000
 						# for big kernels
-	.word	__KERNEL_CS
+	.word	__BOOT_CS
 
 # Here's a bunch of information about your current kernel..
 kernel_version:	.ascii	UTS_RELEASE
diff -upr linux-2.6.16.46-0.12.orig/arch/x86_64/ia32/ia32_aout.c linux-2.6.16.46-0.12-027test011/arch/x86_64/ia32/ia32_aout.c
--- linux-2.6.16.46-0.12.orig/arch/x86_64/ia32/ia32_aout.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/x86_64/ia32/ia32_aout.c	2007-08-28 17:35:31.000000000 +0400
@@ -347,14 +347,14 @@ static int load_aout_binary(struct linux
 		if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
 		    (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ)
 		{
-			printk(KERN_NOTICE "executable not page aligned\n");
+			ve_printk(VE_LOG, KERN_NOTICE "executable not page aligned\n");
 			error_time2 = jiffies;
 		}
 
 		if ((fd_offset & ~PAGE_MASK) != 0 &&
 		    (jiffies-error_time) > 5*HZ)
 		{
-			printk(KERN_WARNING 
+			ve_printk(VE_LOG, KERN_WARNING
 			       "fd_offset is not page aligned. Please convert program: %s\n",
 			       bprm->file->f_dentry->d_name.name);
 			error_time = jiffies;
@@ -467,7 +467,7 @@ static int load_aout_library(struct file
 		static unsigned long error_time;
 		if ((jiffies-error_time) > 5*HZ)
 		{
-			printk(KERN_WARNING 
+			ve_printk(VE_LOG, KERN_WARNING
 			       "N_TXTOFF is not page aligned. Please convert library: %s\n",
 			       file->f_dentry->d_name.name);
 			error_time = jiffies;
diff -upr linux-2.6.16.46-0.12.orig/arch/x86_64/ia32/ia32_binfmt.c linux-2.6.16.46-0.12-027test011/arch/x86_64/ia32/ia32_binfmt.c
--- linux-2.6.16.46-0.12.orig/arch/x86_64/ia32/ia32_binfmt.c	2007-08-24 19:28:10.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/x86_64/ia32/ia32_binfmt.c	2007-08-28 17:35:33.000000000 +0400
@@ -27,12 +27,14 @@
 #include <asm/ia32.h>
 #include <asm/vsyscall32.h>
 
+#include <ub/ub_vmpages.h>
+
 #define ELF_NAME "elf/i386"
 
 #define AT_SYSINFO 32
 #define AT_SYSINFO_EHDR		33
 
-int sysctl_vsyscall32 = 1;
+int sysctl_vsyscall32 = 0;
 
 #define ARCH_DLINFO do {  \
 	if (sysctl_vsyscall32) { \
@@ -347,9 +349,15 @@ int ia32_setup_arg_pages(struct linux_bi
 		bprm->loader += stack_base;
 	bprm->exec += stack_base;
 
+	ret = -ENOMEM;
+	if (ub_memory_charge(mm, stack_top -
+				(PAGE_MASK & (unsigned long)bprm->p),
+				VM_STACK_FLAGS, NULL, UB_SOFT))
+		goto err_charge;
+
 	mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 	if (!mpnt) 
-		return -ENOMEM; 
+		goto err_alloc;
 
 	memset(mpnt, 0, sizeof(*mpnt));
 
@@ -366,11 +374,8 @@ int ia32_setup_arg_pages(struct linux_bi
 			mpnt->vm_flags = VM_STACK_FLAGS;
  		mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC) ? 
  			PAGE_COPY_EXEC : PAGE_COPY;
-		if ((ret = insert_vm_struct(mm, mpnt))) {
-			up_write(&mm->mmap_sem);
-			kmem_cache_free(vm_area_cachep, mpnt);
-			return ret;
-		}
+		if ((ret = insert_vm_struct(mm, mpnt)))
+			goto err_insert;
 		mm->stack_vm = mm->total_vm = vma_pages(mpnt);
 	} 
 
@@ -385,6 +390,15 @@ int ia32_setup_arg_pages(struct linux_bi
 	up_write(&mm->mmap_sem);
 	
 	return 0;
+
+err_insert:
+	up_write(&mm->mmap_sem);
+	kmem_cache_free(vm_area_cachep, mpnt);
+err_alloc:
+	ub_memory_uncharge(mm, stack_top - (PAGE_MASK & (unsigned long)bprm->p),
+			VM_STACK_FLAGS, NULL);
+err_charge:
+	return ret;
 }
 EXPORT_SYMBOL(ia32_setup_arg_pages);
 
diff -upr linux-2.6.16.46-0.12.orig/arch/x86_64/ia32/ia32_signal.c linux-2.6.16.46-0.12-027test011/arch/x86_64/ia32/ia32_signal.c
--- linux-2.6.16.46-0.12.orig/arch/x86_64/ia32/ia32_signal.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/x86_64/ia32/ia32_signal.c	2007-08-28 17:35:33.000000000 +0400
@@ -39,7 +39,6 @@
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
-asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset);
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
 
 int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
@@ -118,22 +117,17 @@ asmlinkage long
 sys32_sigsuspend(int history0, int history1, old_sigset_t mask,
 		 struct pt_regs *regs)
 {
-	sigset_t saveset;
-
 	mask &= _BLOCKABLE;
 	spin_lock_irq(&current->sighand->siglock);
-	saveset = current->blocked;
+	current->saved_sigmask = current->blocked;
 	siginitset(&current->blocked, mask);
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 
-	regs->rax = -EINTR;
-	while (1) {
-		current->state = TASK_INTERRUPTIBLE;
-		schedule();
-		if (do_signal(regs, &saveset))
-			return -EINTR;
-	}
+	current->state = TASK_INTERRUPTIBLE;
+	schedule();
+	set_thread_flag(TIF_RESTORE_SIGMASK);
+	return -ERESTARTNOHAND;
 }
 
 asmlinkage long
@@ -510,11 +504,11 @@ int ia32_setup_frame(int sig, struct k_s
 		current->comm, current->pid, frame, regs->rip, frame->pretcode);
 #endif
 
-	return 1;
+	return 0;
 
 give_sigsegv:
 	force_sigsegv(sig, current);
-	return 0;
+	return -EFAULT;
 }
 
 int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
@@ -606,9 +600,9 @@ int ia32_setup_rt_frame(int sig, struct 
 		current->comm, current->pid, frame, regs->rip, frame->pretcode);
 #endif
 
-	return 1;
+	return 0;
 
 give_sigsegv:
 	force_sigsegv(sig, current);
-	return 0;
+	return -EFAULT;
 }
diff -upr linux-2.6.16.46-0.12.orig/arch/x86_64/ia32/ia32entry.S linux-2.6.16.46-0.12-027test011/arch/x86_64/ia32/ia32entry.S
--- linux-2.6.16.46-0.12.orig/arch/x86_64/ia32/ia32entry.S	2007-08-24 19:28:10.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/x86_64/ia32/ia32entry.S	2007-08-28 17:35:30.000000000 +0400
@@ -492,7 +492,7 @@ ia32_sys_call_table:
 	.quad stub32_iopl		/* 110 */
 	.quad sys_vhangup
 	.quad quiet_ni_syscall	/* old "idle" system call */
-	.quad sys32_vm86_warning	/* vm86old */ 
+	.quad quiet_ni_syscall	/* vm86old */ 
 	.quad compat_sys_wait4
 	.quad sys_swapoff		/* 115 */
 	.quad sys32_sysinfo
@@ -510,7 +510,7 @@ ia32_sys_call_table:
 	.quad sys_init_module
 	.quad sys_delete_module
 	.quad quiet_ni_syscall		/* 130  get_kernel_syms */
-	.quad sys_quotactl
+	.quad sys32_quotactl
 	.quad sys_getpgid
 	.quad sys_fchdir
 	.quad quiet_ni_syscall	/* bdflush */
@@ -545,7 +545,7 @@ ia32_sys_call_table:
 	.quad sys_mremap
 	.quad sys_setresuid16
 	.quad sys_getresuid16	/* 165 */
-	.quad sys32_vm86_warning	/* vm86 */ 
+	.quad quiet_ni_syscall	/* vm86 */ 
 	.quad quiet_ni_syscall	/* query_module */
 	.quad sys_poll
 	.quad compat_sys_nfsservctl
diff -upr linux-2.6.16.46-0.12.orig/arch/x86_64/ia32/sys_ia32.c linux-2.6.16.46-0.12-027test011/arch/x86_64/ia32/sys_ia32.c
--- linux-2.6.16.46-0.12.orig/arch/x86_64/ia32/sys_ia32.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/x86_64/ia32/sys_ia32.c	2007-08-28 17:35:31.000000000 +0400
@@ -527,7 +527,7 @@ int sys32_ni_syscall(int call)
 	static char lastcomm[sizeof(me->comm)];
 
 	if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
-		printk(KERN_INFO "IA32 syscall %d from %s not implemented\n",
+		ve_printk(VE_LOG, KERN_INFO "IA32 syscall %d from %s not implemented\n",
 		       call, me->comm);
 		strncpy(lastcomm, me->comm, sizeof(lastcomm));
 	} 
@@ -890,13 +890,13 @@ asmlinkage long sys32_olduname(struct ol
   
   	down_read(&uts_sem);
 	
-	error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
+	error = __copy_to_user(&name->sysname,&ve_utsname.sysname,__OLD_UTS_LEN);
 	 __put_user(0,name->sysname+__OLD_UTS_LEN);
-	 __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
+	 __copy_to_user(&name->nodename,&ve_utsname.nodename,__OLD_UTS_LEN);
 	 __put_user(0,name->nodename+__OLD_UTS_LEN);
-	 __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
+	 __copy_to_user(&name->release,&ve_utsname.release,__OLD_UTS_LEN);
 	 __put_user(0,name->release+__OLD_UTS_LEN);
-	 __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
+	 __copy_to_user(&name->version,&ve_utsname.version,__OLD_UTS_LEN);
 	 __put_user(0,name->version+__OLD_UTS_LEN);
 	 { 
 		 char *arch = "x86_64";
@@ -919,7 +919,7 @@ long sys32_uname(struct old_utsname __us
 	if (!name)
 		return -EFAULT;
 	down_read(&uts_sem);
-	err=copy_to_user(name, &system_utsname, sizeof (*name));
+	err=copy_to_user(name, &ve_utsname, sizeof (*name));
 	up_read(&uts_sem);
 	if (personality(current->personality) == PER_LINUX32) 
 		err |= copy_to_user(&name->machine, "i686", 5);
@@ -1000,18 +1000,6 @@ long sys32_fadvise64_64(int fd, __u32 of
 			       advice); 
 } 
 
-long sys32_vm86_warning(void)
-{ 
-	struct task_struct *me = current;
-	static char lastcomm[sizeof(me->comm)];
-	if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
-		printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n",
-		       me->comm);
-		strncpy(lastcomm, me->comm, sizeof(lastcomm));
-	} 
-	return -ENOSYS;
-} 
-
 long sys32_lookup_dcookie(u32 addr_low, u32 addr_high,
 			  char __user * buf, size_t len)
 {
diff -upr linux-2.6.16.46-0.12.orig/arch/x86_64/ia32/syscall32.c linux-2.6.16.46-0.12-027test011/arch/x86_64/ia32/syscall32.c
--- linux-2.6.16.46-0.12.orig/arch/x86_64/ia32/syscall32.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/x86_64/ia32/syscall32.c	2007-08-28 17:35:30.000000000 +0400
@@ -14,6 +14,8 @@
 #include <asm/tlbflush.h>
 #include <asm/ia32_unistd.h>
 
+#include <ub/ub_vmpages.h>
+
 extern unsigned char syscall32_syscall[], syscall32_syscall_end[];
 extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[];
 extern int sysctl_vsyscall32;
@@ -47,32 +49,45 @@ int syscall32_setup_pages(struct linux_b
 	int npages = (VSYSCALL32_END - VSYSCALL32_BASE) >> PAGE_SHIFT;
 	struct vm_area_struct *vma;
 	struct mm_struct *mm = current->mm;
+	unsigned long flags;
 	int ret;
 
+	flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC | VM_MAYWRITE |
+		mm->def_flags;
+
+	ret = -ENOMEM;
+	if (ub_memory_charge(mm, VSYSCALL32_END - VSYSCALL32_BASE,
+			flags, NULL, UB_SOFT))
+		goto err_charge;
+
 	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 	if (!vma)
-		return -ENOMEM;
+		goto err_alloc;
 
 	memset(vma, 0, sizeof(struct vm_area_struct));
 	/* Could randomize here */
 	vma->vm_start = VSYSCALL32_BASE;
 	vma->vm_end = VSYSCALL32_END;
 	/* MAYWRITE to allow gdb to COW and set breakpoints */
-	vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
-	vma->vm_flags |= mm->def_flags;
+	vma->vm_flags = flags;
 	vma->vm_page_prot = protection_map[vma->vm_flags & 7];
 	vma->vm_ops = &syscall32_vm_ops;
 	vma->vm_mm = mm;
 
 	down_write(&mm->mmap_sem);
-	if ((ret = insert_vm_struct(mm, vma))) {
-		up_write(&mm->mmap_sem);
-		kmem_cache_free(vm_area_cachep, vma);
-		return ret;
-	}
+	if ((ret = insert_vm_struct(mm, vma)))
+		goto err_ins;
 	mm->total_vm += npages;
 	up_write(&mm->mmap_sem);
 	return 0;
+
+err_ins:
+	up_write(&mm->mmap_sem);
+	kmem_cache_free(vm_area_cachep, vma);
+err_alloc:
+	ub_memory_uncharge(mm, VSYSCALL32_END - VSYSCALL32_BASE, flags, NULL);
+err_charge:
+	return ret;
 }
 
 static int __init init_syscall32(void)
diff -upr linux-2.6.16.46-0.12.orig/arch/x86_64/kernel/acpi/wakeup.S linux-2.6.16.46-0.12-027test011/arch/x86_64/kernel/acpi/wakeup.S
--- linux-2.6.16.46-0.12.orig/arch/x86_64/kernel/acpi/wakeup.S	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/x86_64/kernel/acpi/wakeup.S	2007-08-28 17:35:33.000000000 +0400
@@ -77,7 +77,7 @@ wakeup_code:
 
 	.byte 0x66, 0xea			# prefix + jmpi-opcode
 	.long	wakeup_32 - __START_KERNEL_map
-	.word	__KERNEL_CS
+	.word	__BOOT_CS
 
 	.code32
 wakeup_32:
@@ -96,13 +96,13 @@ wakeup_32:
 	jnc	bogus_cpu
 	movl	%edx,%edi
 	
-	movw	$__KERNEL_DS, %ax
+	movw	$__BOOT_DS, %ax
 	movw	%ax, %ds
 	movw	%ax, %es
 	movw	%ax, %fs
 	movw	%ax, %gs
 
-	movw	$__KERNEL_DS, %ax	
+	movw	$__BOOT_DS, %ax
 	movw	%ax, %ss
 
 	mov	$(wakeup_stack - __START_KERNEL_map), %esp
@@ -187,7 +187,7 @@ reach_compatibility_mode:
 
 wakeup_jumpvector:
 	.long	wakeup_long64 - __START_KERNEL_map
-	.word	__KERNEL_CS
+	.word	__BOOT_CS
 
 .code64
 
diff -upr linux-2.6.16.46-0.12.orig/arch/x86_64/kernel/entry.S linux-2.6.16.46-0.12-027test011/arch/x86_64/kernel/entry.S
--- linux-2.6.16.46-0.12.orig/arch/x86_64/kernel/entry.S	2007-08-24 19:28:27.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/x86_64/kernel/entry.S	2007-08-28 17:35:36.000000000 +0400
@@ -141,7 +141,12 @@ ENTRY(ret_from_fork)
 	popf				# reset kernel eflags
 	CFI_ADJUST_CFA_OFFSET -4
 	call schedule_tail
+ret_from_fork_tail:
 	GET_THREAD_INFO(%rcx)
+	btr $TIF_RESUME,threadinfo_flags(%rcx)
+	jc  x86_64_ret_from_resume
+
+ret_from_fork_check:
 	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
 	jnz rff_trace
 rff_action:	
@@ -157,6 +162,19 @@ rff_trace:
 	call syscall_trace_leave
 	GET_THREAD_INFO(%rcx)	
 	jmp rff_action
+
+x86_64_ret_from_resume:
+	movq (%rsp),%rax
+	testq %rax,%rax
+	jz 1f
+	movq  %rsp,%rdi
+	call  *%rax
+1:
+	addq $256,%rsp
+	cmpq $0,ORIG_RAX(%rsp)
+	jge  ret_from_fork_tail
+	RESTORE_REST
+	jmp  int_ret_from_sys_call
 	CFI_ENDPROC
 
 /*
@@ -252,7 +270,7 @@ sysret_careful:
 	/* Handle a signal */ 
 sysret_signal:
 	sti
-	testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
+	testl $(_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
 	jz    1f
 
 	/* Really a signal */
@@ -357,7 +375,7 @@ int_very_careful:
 	jmp int_restore_rest
 	
 int_signal:
-	testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
+	testl $(_TIF_NOTIFY_RESUME|_TIF_RESTORE_SIGMASK|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
 	jz 1f
 	movq %rsp,%rdi		# &ptregs -> arg1
 	xorl %esi,%esi		# oldset -> arg2
@@ -565,7 +583,7 @@ retint_careful:
 	jmp retint_check
 	
 retint_signal:
-	testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
+	testl $(_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
 	jz    retint_swapgs
 	sti
 	SAVE_REST
@@ -836,7 +854,7 @@ ENTRY(kernel_thread)
 	xorl %r9d,%r9d
 	
 	# clone now
-	call do_fork
+	call do_fork_kthread
 	movq %rax,RAX(%rsp)
 	xorl %edi,%edi
 
diff -upr linux-2.6.16.46-0.12.orig/arch/x86_64/kernel/head.S linux-2.6.16.46-0.12-027test011/arch/x86_64/kernel/head.S
--- linux-2.6.16.46-0.12.orig/arch/x86_64/kernel/head.S	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/x86_64/kernel/head.S	2007-08-28 17:35:33.000000000 +0400
@@ -40,7 +40,7 @@ startup_32:
 	 */
 
 	/* Initialize the %ds segment register */
-	movl $__KERNEL_DS,%eax
+	movl $__BOOT_DS,%eax
 	movl %eax,%ds
 
 	/* Load new GDT with the 64bit segments using 32bit descriptor */
@@ -183,7 +183,14 @@ startup_64:
 	/* esi is pointer to real mode structure with interesting info.
 	   pass it to C */
 	movl	%esi, %edi
-	
+
+	/* Switch to __KERNEL_CS. The segment is the same, but selector
+	 * is different. */
+	pushq	$__KERNEL_CS
+	pushq	$switch_cs
+	lretq
+switch_cs:
+
 	/* Finally jump to run C code and to be on real kernel address
 	 * Since we are running on identity-mapped space we have to jump
 	 * to the full 64bit address , this is only possible as indirect
@@ -243,7 +250,7 @@ pGDT32:
 .org 0xf10	
 ljumpvector:
 	.long	startup_64-__START_KERNEL_map
-	.word	__KERNEL_CS
+	.word	__BOOT_CS
 
 ENTRY(stext)
 ENTRY(_stext)
@@ -355,21 +362,30 @@ gdt:
 .align PAGE_SIZE
 
 /* The TLS descriptors are currently at a different place compared to i386.
-   Hopefully nobody expects them at a fixed place (Wine?) */
+   Hopefully nobody expects them at a fixed place (Wine?)
+   Descriptors rearranged to plase 32bit and TLS selectors in the same
+   places, because it is really necessary. sysret/exit mandates order
+   of kernel/user cs/ds, so we have to extend gdt.
+*/
 	
 ENTRY(cpu_gdt_table)
-	.quad	0x0000000000000000	/* NULL descriptor */
-	.quad	0x0			/* unused */
-	.quad	0x00af9a000000ffff	/* __KERNEL_CS */
-	.quad	0x00cf92000000ffff	/* __KERNEL_DS */
-	.quad	0x00cffa000000ffff	/* __USER32_CS */
-	.quad	0x00cff2000000ffff	/* __USER_DS, __USER32_DS  */		
-	.quad	0x00affa000000ffff	/* __USER_CS */
-	.quad	0x00cf9a000000ffff	/* __KERNEL32_CS */
-	.quad	0,0			/* TSS */
-	.quad	0,0			/* LDT */
-	.quad   0,0,0			/* three TLS descriptors */ 
-	.quad	0			/* unused */
+	.quad	0x0000000000000000	/* 0 NULL descriptor */
+	.quad	0x0			/* 1 unused */
+	.quad	0x00af9a000000ffff	/* 2 __BOOT_CS */
+	.quad	0x00cf92000000ffff	/* 3 __BOOT_DS */
+	.quad	0,0			/* 4,5 TSS */
+	.quad   0,0,0			/* 6-8 three TLS descriptors */
+	.quad	0,0			/* 9,10 LDT */
+	.quad	0x00cf9a000000ffff	/* 11 __KERNEL32_CS */
+	.quad	0x00af9a000000ffff	/* 12 __KERNEL_CS */
+	.quad	0x00cf92000000ffff	/* 13 __KERNEL_DS */
+	.quad	0x00cffa000000ffff	/* 14 __USER32_CS */
+	.quad	0x00cff2000000ffff	/* 15 __USER_DS, __USER32_DS  */
+	.quad	0x00affa000000ffff	/* 16 __USER_CS */
+	.quad	0x0			/* 17 unused */
+	.quad	0,0,0,0,0,0
+	.quad	0,0,0,0,0,0,0,0
+
 gdt_end:	
 	/* asm/segment.h:GDT_ENTRIES must match this */	
 	/* This should be a multiple of the cache line size */
diff -upr linux-2.6.16.46-0.12.orig/arch/x86_64/kernel/ldt.c linux-2.6.16.46-0.12-027test011/arch/x86_64/kernel/ldt.c
--- linux-2.6.16.46-0.12.orig/arch/x86_64/kernel/ldt.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/x86_64/kernel/ldt.c	2007-08-28 17:35:33.000000000 +0400
@@ -16,6 +16,7 @@
 #include <linux/smp_lock.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
+#include <linux/module.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -23,6 +24,8 @@
 #include <asm/desc.h>
 #include <asm/proto.h>
 
+#include <ub/ub_mem.h>
+
 #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
 static void flush_ldt(void *null)
 {
@@ -42,9 +45,9 @@ static int alloc_ldt(mm_context_t *pc, u
 	oldsize = pc->size;
 	mincount = (mincount+511)&(~511);
 	if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
-		newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
+		newldt = ub_vmalloc(mincount*LDT_ENTRY_SIZE);
 	else
-		newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
+		newldt = ub_kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
 
 	if (!newldt)
 		return -ENOMEM;
@@ -109,6 +112,7 @@ int init_new_context(struct task_struct 
 	}
 	return retval;
 }
+EXPORT_SYMBOL_GPL(init_new_context);
 
 /*
  * 
diff -upr linux-2.6.16.46-0.12.orig/arch/x86_64/kernel/nmi.c linux-2.6.16.46-0.12-027test011/arch/x86_64/kernel/nmi.c
--- linux-2.6.16.46-0.12.orig/arch/x86_64/kernel/nmi.c	2007-08-24 19:28:37.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/x86_64/kernel/nmi.c	2007-08-28 17:35:29.000000000 +0400
@@ -557,6 +557,7 @@ static __kprobes int dummy_nmi_callback(
 }
  
 static nmi_callback_t nmi_callback = dummy_nmi_callback;
+static nmi_callback_t nmi_ipi_callback = dummy_nmi_callback;
  
 asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code)
 {
@@ -566,9 +567,21 @@ asmlinkage __kprobes void do_nmi(struct 
 	add_pda(__nmi_count,1);
 	if (!rcu_dereference(nmi_callback)(regs, cpu))
 		default_do_nmi(regs);
+
+	nmi_ipi_callback(regs, cpu);
 	nmi_exit();
 }
 
+void set_nmi_ipi_callback(nmi_callback_t callback)
+{
+	nmi_ipi_callback = callback;
+}
+
+void unset_nmi_ipi_callback(void)
+{
+	nmi_ipi_callback = dummy_nmi_callback;
+}
+
 void set_nmi_callback(nmi_callback_t callback)
 {
 	rcu_assign_pointer(nmi_callback, callback);
diff -upr linux-2.6.16.46-0.12.orig/arch/x86_64/kernel/process.c linux-2.6.16.46-0.12-027test011/arch/x86_64/kernel/process.c
--- linux-2.6.16.46-0.12.orig/arch/x86_64/kernel/process.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/x86_64/kernel/process.c	2007-08-28 17:35:36.000000000 +0400
@@ -37,6 +37,7 @@
 #include <linux/random.h>
 #include <linux/notifier.h>
 #include <linux/kprobes.h>
+#include <linux/sysctl.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -52,13 +53,17 @@
 #include <asm/ia32.h>
 #include <asm/idle.h>
 
-asmlinkage extern void ret_from_fork(void);
+asmlinkage extern void execve(void);
+EXPORT_SYMBOL_GPL(execve);
 
 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
 
 unsigned long boot_option_idle_override = 0;
 EXPORT_SYMBOL(boot_option_idle_override);
 
+asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+EXPORT_SYMBOL(ret_from_fork);
+
 /*
  * Powermanagement idle function, if any..
  */
@@ -302,7 +307,8 @@ void __show_regs(struct pt_regs * regs)
 		(int)strcspn(system_utsname.version, " "),
 		system_utsname.version);
 	printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
-	printk_address(regs->rip); 
+	if (decode_call_traces)
+		printk_address(regs->rip);
 	printk("\nRSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp,
 		regs->eflags);
 	printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
@@ -339,11 +345,26 @@ void __show_regs(struct pt_regs * regs)
 
 void show_regs(struct pt_regs *regs)
 {
-	printk("CPU %d:", smp_processor_id());
+	printk("CPU %d, VCPU %d:%d", smp_processor_id(), task_vsched_id(current), task_cpu(current));
 	__show_regs(regs);
 	show_trace(&regs->rsp);
 }
 
+void smp_show_regs(struct pt_regs *regs, void *data)
+{
+	static DEFINE_SPINLOCK(show_regs_lock);
+
+	if (regs == NULL)
+		return;
+
+	bust_spinlocks(1);
+	spin_lock(&show_regs_lock);
+	printk("----------- IPI show regs -----------\n");
+	show_regs(regs);
+	spin_unlock(&show_regs_lock);
+	bust_spinlocks(0);
+}
+
 /*
  * Free current thread data structures etc..
  */
@@ -835,3 +856,20 @@ unsigned long arch_align_stack(unsigned 
 		sp -= get_random_int() % 8192;
 	return sp & ~0xf;
 }
+
+long do_fork_kthread(unsigned long clone_flags,
+	      unsigned long stack_start,
+	      struct pt_regs *regs,
+	      unsigned long stack_size,
+	      int __user *parent_tidptr,
+	      int __user *child_tidptr)
+{
+	if (ve_allow_kthreads || ve_is_super(get_exec_env()))
+		return do_fork(clone_flags, stack_start, regs, stack_size,
+				parent_tidptr, child_tidptr);
+
+	/* Don't allow kernel_thread() inside VE */
+	printk("kernel_thread call inside VE\n");
+	dump_stack();
+	return -EPERM;
+}
diff -upr linux-2.6.16.46-0.12.orig/arch/x86_64/kernel/ptrace.c linux-2.6.16.46-0.12-027test011/arch/x86_64/kernel/ptrace.c
--- linux-2.6.16.46-0.12.orig/arch/x86_64/kernel/ptrace.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/x86_64/kernel/ptrace.c	2007-08-28 17:35:33.000000000 +0400
@@ -300,6 +300,15 @@ static unsigned long getreg(struct task_
 			return child->thread.fs;
 		case offsetof(struct user_regs_struct, gs_base):
 			return child->thread.gs;
+		case offsetof(struct user_regs_struct, cs):
+			if (test_tsk_thread_flag(child, TIF_SYSCALL_TRACE)) {
+				val = get_stack_long(child, regno - sizeof(struct pt_regs));
+				if (val == __USER_CS)
+					return 0x33;
+				if (val == __USER32_CS)
+					return 0x23;
+			}
+			/* fall through */
 		default:
 			regno = regno - sizeof(struct pt_regs);
 			val = get_stack_long(child, regno);
@@ -581,8 +590,10 @@ static void syscall_trace(struct pt_regs
 	       current_thread_info()->flags, current->ptrace); 
 #endif
 
+	set_pn_state(current, (regs->rax != -ENOSYS) ? PN_STOP_LEAVE : PN_STOP_ENTRY);
 	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
 				? 0x80 : 0));
+	clear_pn_state(current);
 	/*
 	 * this isn't the same as continuing with a signal, but it will do
 	 * for normal use.  strace only continues with a signal if the
diff -upr linux-2.6.16.46-0.12.orig/arch/x86_64/kernel/setup.c linux-2.6.16.46-0.12-027test011/arch/x86_64/kernel/setup.c
--- linux-2.6.16.46-0.12.orig/arch/x86_64/kernel/setup.c	2007-08-24 19:28:32.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/x86_64/kernel/setup.c	2007-08-28 17:35:34.000000000 +0400
@@ -46,6 +46,7 @@
 #include <linux/cpufreq.h>
 #include <linux/dmi.h>
 #include <linux/dma-mapping.h>
+#include <linux/vsched.h>
 
 #include <asm/mtrr.h>
 #include <asm/uaccess.h>
@@ -1337,7 +1338,7 @@ static int show_cpuinfo(struct seq_file 
 
 
 #ifdef CONFIG_SMP
-	if (!cpu_online(c-cpu_data))
+	if (!vcpu_online(c - cpu_data))
 		return 0;
 #endif
 
@@ -1358,9 +1359,13 @@ static int show_cpuinfo(struct seq_file 
 		seq_printf(m, "stepping\t: unknown\n");
 	
 	if (cpu_has(c,X86_FEATURE_TSC)) {
+#ifndef CONFIG_FAIRSCHED
 		unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data));
 		if (!freq)
 			freq = cpu_khz;
+#else
+		unsigned int freq = (unsigned int)ve_scale_khz(cpu_khz);
+#endif
 		seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
 			     freq / 1000, (freq % 1000));
 	}
diff -upr linux-2.6.16.46-0.12.orig/arch/x86_64/kernel/setup64.c linux-2.6.16.46-0.12-027test011/arch/x86_64/kernel/setup64.c
--- linux-2.6.16.46-0.12.orig/arch/x86_64/kernel/setup64.c	2007-08-24 19:28:09.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/x86_64/kernel/setup64.c	2007-08-28 17:35:33.000000000 +0400
@@ -294,3 +294,5 @@ void __cpuinit cpu_init (void)
 
 	raw_local_save_flags(kernel_eflags);
 }
+
+EXPORT_SYMBOL_GPL(cpu_gdt_descr);
diff -upr linux-2.6.16.46-0.12.orig/arch/x86_64/kernel/signal.c linux-2.6.16.46-0.12-027test011/arch/x86_64/kernel/signal.c
--- linux-2.6.16.46-0.12.orig/arch/x86_64/kernel/signal.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/x86_64/kernel/signal.c	2007-08-28 17:35:33.000000000 +0400
@@ -40,37 +40,6 @@ int ia32_setup_frame(int sig, struct k_s
             sigset_t *set, struct pt_regs * regs); 
 
 asmlinkage long
-sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize, struct pt_regs *regs)
-{
-	sigset_t saveset, newset;
-
-	/* XXX: Don't preclude handling different sized sigset_t's.  */
-	if (sigsetsize != sizeof(sigset_t))
-		return -EINVAL;
-
-	if (copy_from_user(&newset, unewset, sizeof(newset)))
-		return -EFAULT;
-	sigdelsetmask(&newset, ~_BLOCKABLE);
-
-	spin_lock_irq(&current->sighand->siglock);
-	saveset = current->blocked;
-	current->blocked = newset;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-#ifdef DEBUG_SIG
-	printk("rt_sigsuspend savset(%lx) newset(%lx) regs(%p) rip(%lx)\n",
-		saveset, newset, regs, regs->rip);
-#endif 
-	regs->rax = -EINTR;
-	while (1) {
-		current->state = TASK_INTERRUPTIBLE;
-		schedule();
-		if (do_signal(regs, &saveset))
-			return -EINTR;
-	}
-}
-
-asmlinkage long
 sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
 		struct pt_regs *regs)
 {
@@ -344,11 +313,11 @@ static int setup_rt_frame(int sig, struc
 		current->comm, current->pid, frame, regs->rip, frame->pretcode);
 #endif
 
-	return 1;
+	return 0;
 
 give_sigsegv:
 	force_sigsegv(sig, current);
-	return 0;
+	return -EFAULT;
 }
 
 /*
@@ -411,7 +380,7 @@ handle_signal(unsigned long sig, siginfo
 #endif
 	ret = setup_rt_frame(sig, ka, info, oldset, regs);
 
-	if (ret) {
+	if (ret == 0) {
 		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
 		if (!(ka->sa.sa_flags & SA_NODEFER))
@@ -428,9 +397,10 @@ handle_signal(unsigned long sig, siginfo
  * want to handle. Thus you cannot kill init even with a SIGKILL even by
  * mistake.
  */
-int do_signal(struct pt_regs *regs, sigset_t *oldset)
+static void do_signal(struct pt_regs *regs)
 {
 	struct k_sigaction ka;
+	sigset_t *oldset;
 	siginfo_t info;
 	int signr;
 
@@ -441,12 +411,14 @@ int do_signal(struct pt_regs *regs, sigs
 	 * if so.
 	 */
 	if (!user_mode(regs))
-		return 1;
+		return;
 
-	if (try_to_freeze())
+	if (try_to_freeze() && !signal_pending(current))
 		goto no_signal;
 
-	if (!oldset)
+	if (test_thread_flag(TIF_RESTORE_SIGMASK))
+		oldset = &current->saved_sigmask;
+	else
 		oldset = &current->blocked;
 
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
@@ -460,7 +432,15 @@ int do_signal(struct pt_regs *regs, sigs
 			set_debugreg(current->thread.debugreg7, 7);
 
 		/* Whee!  Actually deliver the signal.  */
-		return handle_signal(signr, &info, &ka, oldset, regs);
+		if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
+			/* a signal was successfully delivered; the saved
+			 * sigmask will have been stored in the signal frame,
+			 * and will be restored by sigreturn, so we can simply
+			 * clear the TIF_RESTORE_SIGMASK flag */
+			if (test_thread_flag(TIF_RESTORE_SIGMASK))
+				clear_thread_flag(TIF_RESTORE_SIGMASK);
+		}
+		return;
 	}
 
  no_signal:
@@ -481,10 +461,16 @@ int do_signal(struct pt_regs *regs, sigs
 			regs->rip -= 2;
 		}
 	}
-	return 0;
+
+	/* if there's no signal to deliver, we just put the saved sigmask
+	 * back */
+	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
+		clear_thread_flag(TIF_RESTORE_SIGMASK);
+		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
+	}
 }
 
-void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, __u32 thread_info_flags)
+void do_notify_resume(struct pt_regs *regs, sigset_t *unused, __u32 thread_info_flags)
 {
 #ifdef DEBUG_SIG
 	printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%lx pending:%lx\n",
@@ -498,8 +484,8 @@ void do_notify_resume(struct pt_regs *re
 	}
 
 	/* deal with pending signal delivery */
-	if (thread_info_flags & _TIF_SIGPENDING)
-		do_signal(regs,oldset);
+	if (thread_info_flags & (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK))
+		do_signal(regs);
 }
 
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
diff -upr linux-2.6.16.46-0.12.orig/arch/x86_64/kernel/smp.c linux-2.6.16.46-0.12-027test011/arch/x86_64/kernel/smp.c
--- linux-2.6.16.46-0.12.orig/arch/x86_64/kernel/smp.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/x86_64/kernel/smp.c	2007-08-28 17:35:36.000000000 +0400
@@ -30,6 +30,7 @@
 #include <asm/proto.h>
 #include <asm/apicdef.h>
 #include <asm/idle.h>
+#include <asm/nmi.h>
 
 /*
  *	Smarter SMP flushing macros. 
@@ -247,6 +248,8 @@ void flush_tlb_mm (struct mm_struct * mm
 	preempt_enable();
 }
 
+EXPORT_SYMBOL(flush_tlb_mm);
+
 void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
 {
 	struct mm_struct *mm = vma->vm_mm;
@@ -465,6 +468,84 @@ void stop_this_cpu(void* dummy)
 		asm("hlt");
 }
 
+static spinlock_t nmi_call_lock = SPIN_LOCK_UNLOCKED;
+static struct nmi_call_data_struct {
+	smp_nmi_function func;
+	void *info;
+	atomic_t started;
+	atomic_t finished;
+	cpumask_t cpus_called;
+	int wait;
+} *nmi_call_data;
+
+static int smp_nmi_callback(struct pt_regs * regs, int cpu)
+{
+	smp_nmi_function func;
+	void *info;
+	int wait;
+
+	func = nmi_call_data->func;
+	info = nmi_call_data->info;
+	wait = nmi_call_data->wait;
+	ack_APIC_irq();
+	/* prevent from calling func() multiple times */
+	if (cpu_test_and_set(cpu, nmi_call_data->cpus_called))
+		return 0;
+	/*
+	 * notify initiating CPU that I've grabbed the data and am
+	 * about to execute the function
+	 */
+	mb();
+	atomic_inc(&nmi_call_data->started);
+	/* at this point the nmi_call_data structure is out of scope */
+	irq_enter();
+	func(regs, info);
+	irq_exit();
+	if (wait)
+		atomic_inc(&nmi_call_data->finished);
+
+	return 0;
+}
+
+int smp_nmi_call_function(smp_nmi_function func, void *info, int wait)
+{
+	struct nmi_call_data_struct data;
+	int cpus;
+
+	cpus = num_online_cpus() - 1;
+	if (!cpus)
+		return 0;
+
+	data.func = func;
+	data.info = info;
+	data.wait = wait;
+	atomic_set(&data.started, 0);
+	atomic_set(&data.finished, 0);
+	cpus_clear(data.cpus_called);
+	/* prevent this cpu from calling func if NMI happens */
+	cpu_set(smp_processor_id(), data.cpus_called);
+
+	if (!spin_trylock(&nmi_call_lock))
+		return -1;
+
+	nmi_call_data = &data;
+	set_nmi_ipi_callback(smp_nmi_callback);
+	mb();
+
+	/* Send a message to all other CPUs and wait for them to respond */
+	send_IPI_allbutself(APIC_DM_NMI);
+	while (atomic_read(&data.started) != cpus)
+		barrier();
+
+	unset_nmi_ipi_callback();
+	if (wait)
+		while (atomic_read(&data.finished) != cpus)
+			barrier();
+	spin_unlock(&nmi_call_lock);
+
+	return 0;
+}
+
 void smp_stop_cpu(void)
 {
 	unsigned long flags;
diff -upr linux-2.6.16.46-0.12.orig/arch/x86_64/kernel/sys_x86_64.c linux-2.6.16.46-0.12-027test011/arch/x86_64/kernel/sys_x86_64.c
--- linux-2.6.16.46-0.12.orig/arch/x86_64/kernel/sys_x86_64.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/x86_64/kernel/sys_x86_64.c	2007-08-28 17:35:31.000000000 +0400
@@ -148,7 +148,7 @@ asmlinkage long sys_uname(struct new_uts
 {
 	int err;
 	down_read(&uts_sem);
-	err = copy_to_user(name, &system_utsname, sizeof (*name));
+	err = copy_to_user(name, &ve_utsname, sizeof (*name));
 	up_read(&uts_sem);
 	if (personality(current->personality) == PER_LINUX32) 
 		err |= copy_to_user(&name->machine, "i686", 5); 		
diff -upr linux-2.6.16.46-0.12.orig/arch/x86_64/kernel/traps.c linux-2.6.16.46-0.12-027test011/arch/x86_64/kernel/traps.c
--- linux-2.6.16.46-0.12.orig/arch/x86_64/kernel/traps.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/x86_64/kernel/traps.c	2007-08-28 17:35:36.000000000 +0400
@@ -118,6 +118,9 @@ int printk_address(unsigned long address
 	char *delim = ":"; 
 	char namebuf[128];
 
+	if (!decode_call_traces)
+		return printk("[<%016lx>]", address);
+
 	symname = kallsyms_lookup(address, &symsize, &offset, &modname, namebuf); 
 	if (!symname) 
 		return printk("[<%016lx>]", address);
@@ -210,7 +213,7 @@ void show_trace(unsigned long *stack)
 	do while (cond) { \
 		unsigned long addr = *stack++; \
 		if (kernel_text_address(addr)) { \
-			if (i > 50) { \
+			if (i > 50 && decode_call_traces) { \
 				printk("\n       "); \
 				i = 0; \
 			} \
@@ -292,7 +295,7 @@ void show_stack(struct task_struct *tsk,
 		if (((long) stack & (THREAD_SIZE-1)) == 0)
 			break;
 		}
-		if (i && ((i % 4) == 0))
+		if (i && ((i % 4) == 0) && decode_call_traces)
 			printk("\n       ");
 		printk("%016lx ", *stack++);
 		touch_nmi_watchdog();
@@ -300,6 +303,8 @@ void show_stack(struct task_struct *tsk,
 	show_trace((unsigned long *)rsp);
 }
 
+EXPORT_SYMBOL(show_stack);
+
 /*
  * The architecture-independent dump_stack generator
  */
@@ -321,10 +326,12 @@ void show_registers(struct pt_regs *regs
 
 		rsp = regs->rsp;
 
-	printk("CPU %d ", cpu);
+	printk("CPU: %d ", cpu);
 	__show_regs(regs);
-	printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
-		cur->comm, cur->pid, task_thread_info(cur), cur);
+	printk("Process %s (pid: %d, veid=%d, threadinfo %p, task %p)\n",
+		cur->comm, cur->pid,
+		VEID(VE_TASK_INFO(current)->owner_env),
+		task_thread_info(cur), cur);
 
 	/*
 	 * When in-kernel, we also print out the stack and code at the
@@ -456,6 +463,7 @@ void die(const char * str, struct pt_reg
 
 	handle_BUG(regs);
 	__die(str, regs, err);
+	check_kernel_csum_bug();
 	oops_end(flags);
 	do_exit(SIGSEGV); 
 }
@@ -475,6 +483,7 @@ void __kprobes die_nmi(char *str, struct
 	if (panic_on_timeout || panic_on_oops)
 		panic("nmi watchdog");
 	dump(str ,regs);
+	smp_nmi_call_function(smp_show_regs, NULL, 1);
 	printk("console shuts up ...\n");
 	oops_end(flags);
 	nmi_exit();
diff -upr linux-2.6.16.46-0.12.orig/arch/x86_64/lib/Makefile linux-2.6.16.46-0.12-027test011/arch/x86_64/lib/Makefile
--- linux-2.6.16.46-0.12.orig/arch/x86_64/lib/Makefile	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/x86_64/lib/Makefile	2007-08-28 17:35:30.000000000 +0400
@@ -5,6 +5,7 @@
 CFLAGS_csum-partial.o := -funroll-loops
 
 obj-y := io.o iomap_copy.o
+obj-$(CONFIG_SMP) += cpuid-on-cpu.o msr-on-cpu.o
 
 lib-y := csum-partial.o csum-copy.o csum-wrappers.o delay.o \
 	usercopy.o getuser.o putuser.o  \
diff -upr linux-2.6.16.46-0.12.orig/arch/x86_64/lib/cpuid-on-cpu.c linux-2.6.16.46-0.12-027test011/arch/x86_64/lib/cpuid-on-cpu.c
--- linux-2.6.16.46-0.12.orig/arch/x86_64/lib/cpuid-on-cpu.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/x86_64/lib/cpuid-on-cpu.c	2007-08-28 17:35:30.000000000 +0400
@@ -0,0 +1 @@
+#include "../../i386/lib/cpuid-on-cpu.c"
diff -upr linux-2.6.16.46-0.12.orig/arch/x86_64/lib/msr-on-cpu.c linux-2.6.16.46-0.12-027test011/arch/x86_64/lib/msr-on-cpu.c
--- linux-2.6.16.46-0.12.orig/arch/x86_64/lib/msr-on-cpu.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/x86_64/lib/msr-on-cpu.c	2007-08-28 17:35:30.000000000 +0400
@@ -0,0 +1 @@
+#include "../../i386/lib/msr-on-cpu.c"
diff -upr linux-2.6.16.46-0.12.orig/arch/x86_64/mm/fault.c linux-2.6.16.46-0.12-027test011/arch/x86_64/mm/fault.c
--- linux-2.6.16.46-0.12.orig/arch/x86_64/mm/fault.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/arch/x86_64/mm/fault.c	2007-08-28 17:35:31.000000000 +0400
@@ -41,27 +41,6 @@
 #define PF_RSVD	(1<<3)
 #define PF_INSTR	(1<<4)
 
-void bust_spinlocks(int yes)
-{
-	int loglevel_save = console_loglevel;
-	if (yes) {
-		oops_in_progress = 1;
-	} else {
-#ifdef CONFIG_VT
-		unblank_screen();
-#endif
-		oops_in_progress = 0;
-		/*
-		 * OK, the message is on the console.  Now we call printk()
-		 * without oops_in_progress set so that printk will give klogd
-		 * a poke.  Hold onto your hats...
-		 */
-		console_loglevel = 15;		/* NMI oopser may have shut the console up */
-		printk(" ");
-		console_loglevel = loglevel_save;
-	}
-}
-
 /* Sometimes the CPU reports invalid exceptions on prefetch.
    Check that here and ignore.
    Opcode checker based on code by Richard Brunner */
@@ -293,7 +272,7 @@ static int vmalloc_fault(unsigned long a
 }
 
 int page_fault_trace = 0;
-int exception_trace = 1;
+int exception_trace = 0;
 
 /*
  * This routine handles page faults.  It determines the address,
@@ -322,7 +301,7 @@ asmlinkage void __kprobes do_page_fault(
 		local_irq_enable();
 
 	if (unlikely(page_fault_trace))
-		printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
+		ve_printk(VE_LOG, "pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
 		       regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); 
 
 	tsk = current;
@@ -372,7 +351,6 @@ asmlinkage void __kprobes do_page_fault(
 	if (unlikely(in_atomic() || !mm))
 		goto bad_area_nosemaphore;
 
- again:
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
 	 * kernel and should generate an OOPS.  Unfortunatly, in the case of an
@@ -476,7 +454,7 @@ bad_area_nosemaphore:
 			return;
 
 		if (exception_trace && unhandled_signal(tsk, SIGSEGV)) {
-			printk(
+			ve_printk(VE_LOG,
 		       "%s%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n",
 					tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
 					tsk->comm, tsk->pid, address, regs->rip,
@@ -526,8 +504,10 @@ no_context:
 	else
 		printk(KERN_ALERT "Unable to handle kernel paging request");
 	printk(" at %016lx RIP: \n" KERN_ALERT,address);
-	printk_address(regs->rip);
-	printk("\n");
+	if (decode_call_traces) {
+		printk_address(regs->rip);
+		printk("\n");
+	}
 	dump_pagetable(address);
 	tsk->thread.cr2 = address;
 	tsk->thread.trap_no = 14;
@@ -544,13 +524,14 @@ no_context:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (current->pid == 1) { 
-		yield();
-		goto again;
-	}
-	printk("VM: killing process %s\n", tsk->comm);
-	if (error_code & 4)
-		do_exit(SIGKILL);
+	if (error_code & 4) {
+		/*
+		 * 0-order allocation always success if something really
+		 * fatal not happen: beancounter overdraft or OOM.
+		 */
+		force_sig(SIGKILL, tsk);
+		return;
+	}
 	goto no_context;
 
 do_sigbus:
diff -upr linux-2.6.16.46-0.12.orig/arch/x86_64/mm/init.c linux-2.6.16.46-0.12-027test011/arch/x86_64/mm/init.c
--- linux-2.6.16.46-0.12.orig/arch/x86_64/mm/init.c	2007-08-24 19:28:27.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/arch/x86_64/mm/init.c	2007-08-28 17:35:31.000000000 +0400
@@ -89,6 +89,7 @@ void show_mem(void)
 	printk(KERN_INFO "%lu pages shared\n",shared);
 	printk(KERN_INFO "%lu pages swap cached\n",cached);
 }
+EXPORT_SYMBOL(show_mem);
 
 /* References to section boundaries */
 
diff -upr linux-2.6.16.46-0.12.orig/block/elevator.c linux-2.6.16.46-0.12-027test011/block/elevator.c
--- linux-2.6.16.46-0.12.orig/block/elevator.c	2007-08-24 19:28:27.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/block/elevator.c	2007-08-28 17:35:31.000000000 +0400
@@ -765,12 +765,12 @@ void elv_unregister(struct elevator_type
 	 */
 	if (e->ops.trim) {
 		read_lock(&tasklist_lock);
-		do_each_thread(g, p) {
+		do_each_thread_all(g, p) {
 			task_lock(p);
 			if (p->io_context)
 				e->ops.trim(p->io_context);
 			task_unlock(p);
-		} while_each_thread(g, p);
+		} while_each_thread_all(g, p);
 		read_unlock(&tasklist_lock);
 	}
 
diff -upr linux-2.6.16.46-0.12.orig/block/genhd.c linux-2.6.16.46-0.12-027test011/block/genhd.c
--- linux-2.6.16.46-0.12.orig/block/genhd.c	2007-08-24 19:28:06.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/block/genhd.c	2007-08-28 17:35:31.000000000 +0400
@@ -16,7 +16,8 @@
 #include <linux/kobj_map.h>
 #include <linux/buffer_head.h>
 
-static struct subsystem block_subsys;
+struct subsystem block_subsys;
+EXPORT_SYMBOL(block_subsys);
 
 static DECLARE_MUTEX(block_subsys_sem);
 
@@ -511,7 +512,7 @@ static struct kset_uevent_ops block_ueve
 };
 
 /* declare block_subsys. */
-static decl_subsys(block, &ktype_block, &block_uevent_ops);
+decl_subsys(block, &ktype_block, &block_uevent_ops);
 
 
 /*
diff -upr linux-2.6.16.46-0.12.orig/block/ll_rw_blk.c linux-2.6.16.46-0.12-027test011/block/ll_rw_blk.c
--- linux-2.6.16.46-0.12.orig/block/ll_rw_blk.c	2007-08-24 19:28:27.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/block/ll_rw_blk.c	2007-08-28 17:35:30.000000000 +0400
@@ -26,6 +26,7 @@
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/writeback.h>
+#include <linux/task_io_accounting_ops.h>
 #include <linux/interrupt.h>
 #include <linux/cpu.h>
 #include <linux/dump.h>
@@ -3114,10 +3115,12 @@ void submit_bio(int rw, struct bio *bio)
 	BIO_BUG_ON(!bio->bi_size);
 	BIO_BUG_ON(!bio->bi_io_vec);
 	bio->bi_rw |= rw;
-	if (rw & WRITE)
+	if (rw & WRITE) {
 		mod_page_state(pgpgout, count);
-	else
+	} else {
+		task_io_account_read(bio->bi_size);
 		mod_page_state(pgpgin, count);
+	}
 
 	if (unlikely(block_dump)) {
 		char b[BDEVNAME_SIZE];
diff -upr linux-2.6.16.46-0.12.orig/drivers/base/class.c linux-2.6.16.46-0.12-027test011/drivers/base/class.c
--- linux-2.6.16.46-0.12.orig/drivers/base/class.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/drivers/base/class.c	2007-08-28 17:35:31.000000000 +0400
@@ -72,8 +72,13 @@ static struct kobj_type ktype_class = {
 };
 
 /* Hotplug events for classes go to the class_obj subsys */
-static decl_subsys(class, &ktype_class, NULL);
+decl_subsys(class, &ktype_class, NULL);
 
+#ifndef CONFIG_VE
+#define visible_class_subsys class_subsys
+#else
+#define visible_class_subsys (*get_exec_env()->class_subsys)
+#endif
 
 int class_create_file(struct class * cls, const struct class_attribute * attr)
 {
@@ -148,7 +153,7 @@ int class_register(struct class * cls)
 	if (error)
 		return error;
 
-	subsys_set_kset(cls, class_subsys);
+	subsys_set_kset(cls, visible_class_subsys);
 
 	error = subsystem_register(&cls->subsys);
 	if (!error) {
@@ -420,8 +425,13 @@ static struct kset_uevent_ops class_ueve
 	.uevent =	class_uevent,
 };
 
-static decl_subsys(class_obj, &ktype_class_device, &class_uevent_ops);
+decl_subsys(class_obj, &ktype_class_device, &class_uevent_ops);
 
+#ifndef CONFIG_VE
+#define visible_class_obj_subsys class_obj_subsys
+#else
+#define visible_class_obj_subsys (*get_exec_env()->class_obj_subsys)
+#endif
 
 static int class_device_add_attrs(struct class_device * cd)
 {
@@ -470,7 +480,7 @@ static ssize_t store_uevent(struct class
 
 void class_device_initialize(struct class_device *class_dev)
 {
-	kobj_set_kset_s(class_dev, class_obj_subsys);
+	kobj_set_kset_s(class_dev, visible_class_obj_subsys);
 	kobject_init(&class_dev->kobj);
 	INIT_LIST_HEAD(&class_dev->node);
 }
@@ -805,12 +815,19 @@ void class_interface_unregister(struct c
 	class_put(parent);
 }
 
-
+void prepare_sysfs_classes(void)
+{
+#ifdef CONFIG_VE
+	get_ve0()->class_subsys = &class_subsys;
+	get_ve0()->class_obj_subsys = &class_obj_subsys;
+#endif
+}
 
 int __init classes_init(void)
 {
 	int retval;
 
+	prepare_sysfs_classes();
 	retval = subsystem_register(&class_subsys);
 	if (retval)
 		return retval;
@@ -848,3 +865,6 @@ EXPORT_SYMBOL_GPL(class_device_remove_bi
 
 EXPORT_SYMBOL_GPL(class_interface_register);
 EXPORT_SYMBOL_GPL(class_interface_unregister);
+
+EXPORT_SYMBOL(class_subsys);
+EXPORT_SYMBOL(class_obj_subsys);
diff -upr linux-2.6.16.46-0.12.orig/drivers/block/loop.c linux-2.6.16.46-0.12-027test011/drivers/block/loop.c
--- linux-2.6.16.46-0.12.orig/drivers/block/loop.c	2007-08-24 19:28:37.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/drivers/block/loop.c	2007-08-28 17:35:30.000000000 +0400
@@ -585,7 +585,7 @@ static int loop_make_request(request_que
 
 	spin_lock_irq(&lo->lo_lock);
 	if (lo->lo_state != Lo_bound)
-		goto out;
+		goto out_not_bound;
 	if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY)))
 		goto out;
 	lo->lo_pending++;
@@ -597,6 +597,7 @@ static int loop_make_request(request_que
 out:
 	if (lo->lo_pending == 0)
 		complete(&lo->lo_bh_done);
+out_not_bound:
 	spin_unlock_irq(&lo->lo_lock);
 	bio_io_error(old_bio, old_bio->bi_size);
 	return 0;
diff -upr linux-2.6.16.46-0.12.orig/drivers/char/pty.c linux-2.6.16.46-0.12-027test011/drivers/char/pty.c
--- linux-2.6.16.46-0.12.orig/drivers/char/pty.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/drivers/char/pty.c	2007-08-28 17:35:31.000000000 +0400
@@ -32,16 +32,30 @@
 #include <linux/bitops.h>
 #include <linux/devpts_fs.h>
 
+#include <ub/ub_misc.h>
+
 /* These are global because they are accessed in tty_io.c */
 #ifdef CONFIG_UNIX98_PTYS
 struct tty_driver *ptm_driver;
-static struct tty_driver *pts_driver;
+struct tty_driver *pts_driver;
+EXPORT_SYMBOL(ptm_driver);
+EXPORT_SYMBOL(pts_driver);
+
+void prepare_pty(void)
+{
+#ifdef CONFIG_VE
+	get_ve0()->ptm_driver = ptm_driver;
+	/* don't clean ptm_driver and co. here, they are used in vecalls.c */
+#endif
+}
 #endif
 
 static void pty_close(struct tty_struct * tty, struct file * filp)
 {
 	if (!tty)
 		return;
+
+	ub_pty_uncharge(tty);
 	if (tty->driver->subtype == PTY_TYPE_MASTER) {
 		if (tty->count > 1)
 			printk("master pty_close: count = %d!!\n", tty->count);
@@ -61,8 +75,12 @@ static void pty_close(struct tty_struct 
 	if (tty->driver->subtype == PTY_TYPE_MASTER) {
 		set_bit(TTY_OTHER_CLOSED, &tty->flags);
 #ifdef CONFIG_UNIX98_PTYS
-		if (tty->driver == ptm_driver)
+		if (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM) {
+			struct ve_struct *old_env;
+			old_env = set_exec_env(tty->owner_env);
 			devpts_pty_kill(tty->index);
+			(void)set_exec_env(old_env);
+		}
 #endif
 		tty_vhangup(tty->link);
 	}
@@ -212,6 +230,10 @@ static int pty_open(struct tty_struct *t
 	if (tty->link->count != 1)
 		goto out;
 
+	retval = -ENOMEM;
+	if (ub_pty_charge(tty))
+		goto out;
+
 	clear_bit(TTY_OTHER_CLOSED, &tty->link->flags);
 	set_bit(TTY_THROTTLED, &tty->flags);
 	set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
@@ -239,7 +261,9 @@ static struct tty_operations pty_ops = {
 
 /* Traditional BSD devices */
 #ifdef CONFIG_LEGACY_PTYS
-static struct tty_driver *pty_driver, *pty_slave_driver;
+struct tty_driver *pty_driver, *pty_slave_driver;
+EXPORT_SYMBOL(pty_driver);
+EXPORT_SYMBOL(pty_slave_driver);
 
 static int pty_bsd_ioctl(struct tty_struct *tty, struct file *file,
 			 unsigned int cmd, unsigned long arg)
@@ -397,6 +421,7 @@ static void __init unix98_pty_init(void)
 		panic("Couldn't register Unix98 pts driver");
 
 	pty_table[1].data = &ptm_driver->refcount;
+	prepare_pty();
 }
 #else
 static inline void unix98_pty_init(void) { }
diff -upr linux-2.6.16.46-0.12.orig/drivers/char/random.c linux-2.6.16.46-0.12-027test011/drivers/char/random.c
--- linux-2.6.16.46-0.12.orig/drivers/char/random.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/drivers/char/random.c	2007-08-28 17:35:30.000000000 +0400
@@ -691,9 +691,14 @@ static void xfer_secondary_pool(struct e
 
 	if (r->pull && r->entropy_count < nbytes * 8 &&
 	    r->entropy_count < r->poolinfo->POOLBITS) {
-		int bytes = max_t(int, random_read_wakeup_thresh / 8,
-				min_t(int, nbytes, sizeof(tmp)));
+		/* If we're limited, always leave two wakeup worth's BITS */
 		int rsvd = r->limit ? 0 : random_read_wakeup_thresh/4;
+		int bytes = nbytes;
+
+		/* pull at least as many as BYTES as wakeup BITS */
+		bytes = max_t(int, bytes, random_read_wakeup_thresh / 8);
+		/* but never more than the buffer size */
+		bytes = min_t(int, bytes, sizeof(tmp));
 
 		DEBUG_ENT("going to reseed %s with %d bits "
 			  "(%d of %d requested)\n",
@@ -758,7 +763,7 @@ static size_t account(struct entropy_sto
 
 static void extract_buf(struct entropy_store *r, __u8 *out)
 {
-	int i, x;
+	int i;
 	__u32 data[16], buf[5 + SHA_WORKSPACE_WORDS];
 
 	sha_init(buf);
@@ -770,9 +775,11 @@ static void extract_buf(struct entropy_s
 	 * attempts to find previous ouputs), unless the hash
 	 * function can be inverted.
 	 */
-	for (i = 0, x = 0; i < r->poolinfo->poolwords; i += 16, x+=2) {
-		sha_transform(buf, (__u8 *)r->pool+i, buf + 5);
-		add_entropy_words(r, &buf[x % 5], 1);
+	for (i = 0; i < r->poolinfo->poolwords; i += 16) {
+		/* hash blocks of 16 words = 512 bits */
+		sha_transform(buf, (__u8 *)(r->pool + i), buf + 5);
+		/* feed back portion of the resulting hash */
+		add_entropy_words(r, &buf[i % 5], 1);
 	}
 
 	/*
@@ -780,7 +787,7 @@ static void extract_buf(struct entropy_s
 	 * portion of the pool while mixing, and hash one
 	 * final time.
 	 */
-	__add_entropy_words(r, &buf[x % 5], 1, data);
+	__add_entropy_words(r, &buf[i % 5], 1, data);
 	sha_transform(buf, (__u8 *)data, buf + 5);
 
 	/*
@@ -1018,37 +1025,44 @@ random_poll(struct file *file, poll_tabl
 	return mask;
 }
 
-static ssize_t
-random_write(struct file * file, const char __user * buffer,
-	     size_t count, loff_t *ppos)
+static int
+write_pool(struct entropy_store *r, const char __user *buffer, size_t count)
 {
-	int ret = 0;
 	size_t bytes;
 	__u32 buf[16];
 	const char __user *p = buffer;
-	size_t c = count;
 
-	while (c > 0) {
-		bytes = min(c, sizeof(buf));
+	while (count > 0) {
+		bytes = min(count, sizeof(buf));
+		if (copy_from_user(&buf, p, bytes))
+			return -EFAULT;
 
-		bytes -= copy_from_user(&buf, p, bytes);
-		if (!bytes) {
-			ret = -EFAULT;
-			break;
-		}
-		c -= bytes;
+		count -= bytes;
 		p += bytes;
 
-		add_entropy_words(&input_pool, buf, (bytes + 3) / 4);
-	}
-	if (p == buffer) {
-		return (ssize_t)ret;
-	} else {
-		struct inode *inode = file->f_dentry->d_inode;
-	        inode->i_mtime = current_fs_time(inode->i_sb);
-		mark_inode_dirty(inode);
-		return (ssize_t)(p - buffer);
+		add_entropy_words(r, buf, (bytes + 3) / 4);
 	}
+
+	return 0;
+}
+
+static ssize_t
+random_write(struct file * file, const char __user * buffer,
+	     size_t count, loff_t *ppos)
+{
+	size_t ret;
+	struct inode *inode = file->f_dentry->d_inode;
+
+	ret = write_pool(&blocking_pool, buffer, count);
+	if (ret)
+		return ret;
+	ret = write_pool(&nonblocking_pool, buffer, count);
+	if (ret)
+		return ret;
+
+	inode->i_mtime = current_fs_time(inode->i_sb);
+	mark_inode_dirty(inode);
+	return (ssize_t)count;
 }
 
 static int
@@ -1087,8 +1101,8 @@ random_ioctl(struct inode * inode, struc
 			return -EINVAL;
 		if (get_user(size, p++))
 			return -EFAULT;
-		retval = random_write(file, (const char __user *) p,
-				      size, &file->f_pos);
+		retval = write_pool(&input_pool, (const char __user *)p,
+				    size);
 		if (retval < 0)
 			return retval;
 		credit_entropy_store(&input_pool, ent_count);
diff -upr linux-2.6.16.46-0.12.orig/drivers/char/snsc_event.c linux-2.6.16.46-0.12-027test011/drivers/char/snsc_event.c
--- linux-2.6.16.46-0.12.orig/drivers/char/snsc_event.c	2007-08-24 19:28:15.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/drivers/char/snsc_event.c	2007-08-28 17:35:31.000000000 +0400
@@ -223,7 +223,7 @@ scdrv_dispatch_event(char *event, int le
 
 		/* first find init's task */
 		read_lock(&tasklist_lock);
-		for_each_process(p) {
+		for_each_process_all(p) {
 			if (p->pid == 1)
 				break;
 		}
diff -upr linux-2.6.16.46-0.12.orig/drivers/char/sysrq.c linux-2.6.16.46-0.12-027test011/drivers/char/sysrq.c
--- linux-2.6.16.46-0.12.orig/drivers/char/sysrq.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/drivers/char/sysrq.c	2007-08-28 17:35:34.000000000 +0400
@@ -36,6 +36,8 @@
 #include <linux/vt_kern.h>
 #include <linux/workqueue.h>
 #include <linux/kexec.h>
+#include <linux/kallsyms.h>
+#include <linux/slab.h>
 
 #include <asm/ptrace.h>
 
@@ -174,8 +176,13 @@ static struct sysrq_key_op sysrq_showloc
 static void sysrq_handle_showregs(int key, struct pt_regs *pt_regs,
 				  struct tty_struct *tty) 
 {
+	bust_spinlocks(1);
 	if (pt_regs)
 		show_regs(pt_regs);
+	bust_spinlocks(0);
+#if defined(__i386__) || defined(__x86_64__)
+	smp_nmi_call_function(smp_show_regs, NULL, 0);
+#endif
 }
 static struct sysrq_key_op sysrq_showregs_op = {
 	.handler	= sysrq_handle_showregs,
@@ -202,6 +209,7 @@ static void sysrq_handle_showmem(int key
 				 struct tty_struct *tty) 
 {
 	show_mem();
+	show_slab_info();
 }
 static struct sysrq_key_op sysrq_showmem_op = {
 	.handler	= sysrq_handle_showmem,
@@ -221,7 +229,7 @@ static void send_sig_all(int sig)
 {
 	struct task_struct *p;
 
-	for_each_process(p) {
+	for_each_process_all(p) {
 		if (p->mm && p->pid != 1)
 			/* Not swapper, init nor kernel thread */
 			force_sig(sig, p);
@@ -272,6 +280,19 @@ static struct sysrq_key_op sysrq_kill_op
 	.enable_mask	= SYSRQ_ENABLE_SIGNAL,
 };
 
+#ifdef CONFIG_SCHED_VCPU
+static void sysrq_handle_vschedstate(int key, struct pt_regs *pt_regs,
+				   struct tty_struct *tty)
+{
+	show_vsched();
+}
+static struct sysrq_key_op sysrq_vschedstate_op = {
+	.handler	= sysrq_handle_vschedstate,
+	.help_msg	= "vsced_stAte",
+	.action_msg	= "Show Vsched",
+};
+#endif
+
 /* END SIGNAL SYSRQ HANDLERS BLOCK */
 
 static void sysrq_handle_unrt(int key, struct pt_regs *pt_regs,
@@ -288,8 +309,274 @@ static struct sysrq_key_op sysrq_unrt_op
 
 /* Key Operations table and lock */
 static DEFINE_SPINLOCK(sysrq_key_table_lock);
-#define SYSRQ_KEY_TABLE_LENGTH 36
-static struct sysrq_key_op *sysrq_key_table[SYSRQ_KEY_TABLE_LENGTH] = {
+#define SYSRQ_KEY_TABLE_LENGTH 37
+static struct sysrq_key_op **sysrq_key_table;
+static struct sysrq_key_op *sysrq_default_key_table[];
+
+#ifdef CONFIG_SYSRQ_DEBUG
+#define SYSRQ_NAMELEN_MAX	64
+#define SYSRQ_DUMP_LINES	32
+
+static struct sysrq_key_op *sysrq_debug_key_table[];
+static struct sysrq_key_op *sysrq_input_key_table[];
+static unsigned long *dump_address;
+static int orig_console_loglevel;
+static void (*sysrq_input_return)(char *) = NULL;
+
+static void dump_mem(void)
+{
+	unsigned long value[4];
+	mm_segment_t old_fs;
+	int line, err;
+
+	old_fs = get_fs();
+	set_fs(KERNEL_DS);
+	err = 0;
+
+	for (line = 0; line < SYSRQ_DUMP_LINES; line++) {
+		err |= __get_user(value[0], dump_address++);
+		err |= __get_user(value[1], dump_address++);
+		err |= __get_user(value[2], dump_address++);
+		err |= __get_user(value[3], dump_address++);
+		if (err) {
+			printk("Invalid address %p\n", dump_address - 4);
+			break;
+		}
+#if BITS_PER_LONG == 32
+		printk("0x%p: %08lx %08lx %08lx %08lx\n",
+				dump_address - 4,
+				value[0], value[1], value[2], value[3]);
+#else
+		printk("0x%p: %016lx %016lx %016lx %016lx\n",
+				dump_address - 4,
+				value[0], value[1], value[2], value[3]);
+#endif
+	}
+	set_fs(old_fs);
+}
+
+static void write_mem(unsigned long val)
+{
+	mm_segment_t old_fs;
+	unsigned long old_val;
+
+	old_fs = get_fs();
+	set_fs(KERNEL_DS);
+	if (__get_user(old_val, dump_address)) {
+		printk("Invalid address %p\n", dump_address);
+		goto out;
+	}
+
+#if BITS_PER_LONG == 32
+	printk("Changing [%p] from %08lx to %08lx\n",
+			dump_address, old_val, val);
+#else
+	printk("Changing [%p] from %016lx to %016lx\n",
+			dump_address, old_val, val);
+#endif
+	__put_user(val, dump_address);
+out:
+	set_fs(old_fs);
+}
+
+static void handle_read(int key, struct pt_regs *pt_regs,
+		struct tty_struct *tty)
+{
+	static int pos;
+	static int upper_case;
+	static char str[SYSRQ_NAMELEN_MAX];
+
+	if (key == 0) {
+		/* actually 0 is not shift only... */
+		upper_case = 1;
+		return;
+	}
+
+	if (key == 0x0d || pos == SYSRQ_NAMELEN_MAX - 1) {
+		/* enter */
+		sysrq_key_table = sysrq_debug_key_table;
+		str[pos] = '\0';
+		pos = upper_case = 0;
+		printk("\n");
+		if (sysrq_input_return == NULL)
+			printk("No return handler!!!\n");
+		else
+			sysrq_input_return(str);
+		return;
+	};
+
+	/* check for alowed symbols */
+	if (key == '-') {
+		if (upper_case)
+			key = '_';
+		goto correct;
+	};
+	if (key >= 'a' && key <= 'z') {
+		if (upper_case)
+			key = key - 'a' + 'A';
+		goto correct;
+	};
+	if (key >= '0' && key <= '9')
+		goto correct;
+
+	upper_case = 0;
+	return;
+
+correct:
+	str[pos] = key;
+	printk("%c", (char)key);
+	pos++;
+	upper_case = 0;
+}
+
+static struct sysrq_key_op input_read = {
+	.handler	= handle_read,
+	.help_msg	= "",
+	.action_msg	= NULL,
+};
+
+static struct sysrq_key_op *sysrq_input_key_table[SYSRQ_KEY_TABLE_LENGTH] = {
+	[0 ... SYSRQ_KEY_TABLE_LENGTH - 1] = &input_read,
+};
+
+static void return_dump_mem(char *str)
+{
+	unsigned long address;
+	char *end;
+
+	address = simple_strtoul(str, &end, 0);
+	if (*end != '\0') {
+		printk("Bad address [%s]\n", str);
+		return;
+	}
+
+	dump_address = (unsigned long *)address;
+	dump_mem();
+}
+
+static void handle_dump_mem(int key, struct pt_regs *pt_regs,
+		struct tty_struct *tty)
+{
+	sysrq_input_return = return_dump_mem;
+	sysrq_key_table = sysrq_input_key_table;
+}
+
+static struct sysrq_key_op debug_dump_mem = {
+	.handler	= handle_dump_mem,
+	.help_msg	= "Dump",
+	.action_msg	= "Enter address:",
+};
+
+static void return_resolve(char *str)
+{
+	unsigned long address;
+
+	address = kallsyms_lookup_name(str);
+	printk("%s : %lx\n", str, address);
+	if (address) {
+		dump_address = (unsigned long *)address;
+		printk("Now you can dump it via X\n");
+	}
+}
+
+static void handle_resolve(int key, struct pt_regs *pt_regs,
+		struct tty_struct *tty)
+{
+	sysrq_input_return = return_resolve;
+	sysrq_key_table = sysrq_input_key_table;
+}
+
+static struct sysrq_key_op debug_resolve = {
+	.handler	= handle_resolve,
+	.help_msg	= "Resolve",
+	.action_msg	= "Enter symbol name:",
+};
+
+static void return_write_mem(char *str)
+{
+	unsigned long address;
+	unsigned long value;
+	char *end;
+
+	address = simple_strtoul(str, &end, 0);
+	if (*end != '-') {
+		printk("Bad address in %s\n", str);
+		return;
+	}
+	value = simple_strtoul(end + 1, &end, 0);
+	if (*end != '\0') {
+		printk("Bad value in %s\n", str);
+		return;
+	}
+
+	dump_address = (unsigned long *)address;
+	write_mem(value);
+}
+
+static void handle_write_mem(int key, struct pt_regs *pt_regs,
+		struct tty_struct *tty)
+{
+	sysrq_input_return = return_write_mem;
+	sysrq_key_table = sysrq_input_key_table;
+}
+
+static struct sysrq_key_op debug_write_mem = {
+	.handler	= handle_write_mem,
+	.help_msg	= "Writemem",
+	.action_msg	= "Enter address-value:",
+};
+
+static void handle_next(int key, struct pt_regs *pt_regs,
+		struct tty_struct *tty)
+{
+	dump_mem();
+}
+
+static struct sysrq_key_op debug_next = {
+	.handler	= handle_next,
+	.help_msg	= "neXt",
+	.action_msg	= "continuing",
+};
+
+static void handle_quit(int key, struct pt_regs *pt_regs,
+		struct tty_struct *tty)
+{
+	sysrq_key_table = sysrq_default_key_table;
+	console_loglevel = orig_console_loglevel;
+}
+
+static struct sysrq_key_op debug_quit = {
+	.handler	= handle_quit,
+	.help_msg	= "Quit",
+	.action_msg	= "Tnahk you for using debugger",
+};
+
+static struct sysrq_key_op *sysrq_debug_key_table[SYSRQ_KEY_TABLE_LENGTH] = {
+	[13] = &debug_dump_mem,		/* d */
+	[26] = &debug_quit,		/* q */
+	[27] = &debug_resolve,		/* r */
+	[32] = &debug_write_mem,	/* w */
+	[33] = &debug_next,		/* x */
+};
+
+static void sysrq_handle_debug(int key, struct pt_regs *pt_regs,
+		struct tty_struct *tty)
+{
+	orig_console_loglevel = console_loglevel;
+	console_loglevel = 8;
+	sysrq_key_table = sysrq_debug_key_table;
+	printk("Welcome sysrq debugging mode\n"
+			"Press H for help\n");
+}
+
+static struct sysrq_key_op sysrq_debug_op = {
+	.handler        = sysrq_handle_debug,
+	.help_msg       = "debuG",
+	.action_msg     = "Select desired action",
+};
+#endif
+
+static struct sysrq_key_op *sysrq_default_key_table[SYSRQ_KEY_TABLE_LENGTH] = {
 /* 0 */	&sysrq_loglevel_op,
 /* 1 */	&sysrq_loglevel_op,
 /* 2 */	&sysrq_loglevel_op,
@@ -300,9 +587,13 @@ static struct sysrq_key_op *sysrq_key_ta
 /* 7 */	&sysrq_loglevel_op,
 /* 8 */	&sysrq_loglevel_op,
 /* 9 */	&sysrq_loglevel_op,
+#ifdef CONFIG_SCHED_VCPU
+/* a */ &sysrq_vschedstate_op,
+#else
 /* a */	NULL, /* Don't use for system provided sysrqs,
 		 it is handled specially on the sparc
 		 and will never arrive */
+#endif
 /* b */	&sysrq_reboot_op,
 #ifdef CONFIG_KEXEC
 /* c */ &sysrq_crashdump_op,
@@ -316,7 +607,11 @@ static struct sysrq_key_op *sysrq_key_ta
 #endif
 /* e */	&sysrq_term_op,
 /* f */	&sysrq_moom_op,
+#ifdef CONFIG_SYSRQ_DEBUG
+/* g */ &sysrq_debug_op,
+#else
 /* g */	NULL,
+#endif
 /* h */	NULL,
 /* i */	&sysrq_kill_op,
 /* j */	NULL,
@@ -344,9 +639,12 @@ static struct sysrq_key_op *sysrq_key_ta
 /* w */	NULL,
 /* x */	NULL,
 /* y */	NULL,
-/* z */	NULL
+/* z */	NULL,
+/* * */	NULL, /* For sysrq debugger */
 };
 
+static struct sysrq_key_op **sysrq_key_table = sysrq_default_key_table;
+
 /* key2index calculation, -1 on invalid index */
 static int sysrq_key_table_key2index(int key) {
 	int retval;
@@ -354,6 +652,10 @@ static int sysrq_key_table_key2index(int
 		retval = key - '0';
 	} else if ((key >= 'a') && (key <= 'z')) {
 		retval = key + 10 - 'a';
+#ifdef CONFIG_SYSRQ_DEBUG
+	} else if (key == 0 || key == 0x0d || key == '-') {
+		retval = SYSRQ_KEY_TABLE_LENGTH - 1;
+#endif
 	} else {
 		retval = -1;
 	}
@@ -397,7 +699,6 @@ void __handle_sysrq(int key, struct pt_r
 	spin_lock_irqsave(&sysrq_key_table_lock, flags);
 	orig_log_level = console_loglevel;
 	console_loglevel = 7;
-	printk(KERN_INFO "SysRq : ");
 
         op_p = __sysrq_get_key_op(key);
         if (op_p) {
@@ -405,14 +706,15 @@ void __handle_sysrq(int key, struct pt_r
 		 * and is the invoked operation enabled? */
 		if (!check_mask || sysrq_enabled == 1 ||
 		    (sysrq_enabled & op_p->enable_mask)) {
-			printk ("%s\n", op_p->action_msg);
+		    	if (op_p->action_msg)
+				printk ("SysRq: %s\n", op_p->action_msg);
 			console_loglevel = orig_log_level;
 			op_p->handler(key, pt_regs, tty);
 		}
 		else
 			printk("This sysrq operation is disabled.\n");
 	} else {
-		printk("HELP : ");
+		printk("SysRq HELP : ");
 		/* Only print the help msg once per handler */
 		for (i=0; i<SYSRQ_KEY_TABLE_LENGTH; i++) 
 		if (sysrq_key_table[i]) {
diff -upr linux-2.6.16.46-0.12.orig/drivers/char/tty_io.c linux-2.6.16.46-0.12-027test011/drivers/char/tty_io.c
--- linux-2.6.16.46-0.12.orig/drivers/char/tty_io.c	2007-08-24 19:28:37.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/drivers/char/tty_io.c	2007-08-28 17:35:31.000000000 +0400
@@ -105,6 +105,7 @@
 #include <linux/devfs_fs_kernel.h>
 
 #include <linux/kmod.h>
+#include <ub/ub_mem.h>
 
 #undef TTY_DEBUG_HANGUP
 
@@ -122,11 +123,16 @@ struct termios tty_std_termios = {	/* fo
 
 EXPORT_SYMBOL(tty_std_termios);
 
+/* this lock protects tty_drivers list, this pretty guys do no locking */
+rwlock_t tty_driver_guard = RW_LOCK_UNLOCKED;
+EXPORT_SYMBOL(tty_driver_guard);
+
 /* This list gets poked at by procfs and various bits of boot up code. This
    could do with some rationalisation such as pulling the tty proc function
    into this file */
    
 LIST_HEAD(tty_drivers);			/* linked list of tty drivers */
+EXPORT_SYMBOL(tty_drivers);
 
 /* Semaphore to protect creating and releasing a tty. This is shared with
    vt.c for deeply disgusting hack reasons */
@@ -138,6 +144,15 @@ int console_use_vt = 1;
 extern struct tty_driver *ptm_driver;	/* Unix98 pty masters; for /dev/ptmx */
 extern int pty_limit;		/* Config limit on Unix98 ptys */
 static DEFINE_IDR(allocated_ptys);
+#ifdef CONFIG_VE
+#define __ve_allocated_ptys(ve) (*((ve)->allocated_ptys))
+#define ve_allocated_ptys	__ve_allocated_ptys(get_exec_env())
+#define ve_ptm_driver		(get_exec_env()->ptm_driver)
+#else
+#define __ve_allocated_ptys(ve) allocated_ptys
+#define ve_allocated_ptys	allocated_ptys
+#define ve_ptm_driver		ptm_driver
+#endif
 static DECLARE_MUTEX(allocated_ptys_lock);
 static int ptmx_open(struct inode *, struct file *);
 #endif
@@ -158,11 +173,22 @@ static int tty_fasync(int fd, struct fil
 static void release_mem(struct tty_struct *tty, int idx);
 
 
+void prepare_tty(void)
+{
+#ifdef CONFIG_VE
+	get_ve0()->allocated_ptys = &allocated_ptys;
+	/*
+	 * in this case, tty_register_driver() setups
+	 * owner_env correctly right from the bootup
+	 */
+#endif
+}
+
 static struct tty_struct *alloc_tty_struct(void)
 {
 	struct tty_struct *tty;
 
-	tty = kmalloc(sizeof(struct tty_struct), GFP_KERNEL);
+	tty = ub_kmalloc(sizeof(struct tty_struct), GFP_KERNEL);
 	if (tty)
 		memset(tty, 0, sizeof(struct tty_struct));
 	return tty;
@@ -859,14 +885,37 @@ static struct tty_driver *get_tty_driver
 {
 	struct tty_driver *p;
 
+	read_lock(&tty_driver_guard);
 	list_for_each_entry(p, &tty_drivers, tty_drivers) {
 		dev_t base = MKDEV(p->major, p->minor_start);
 		if (device < base || device >= base + p->num)
 			continue;
 		*index = device - base;
-		return p;
+#ifdef CONFIG_VE
+		if (in_interrupt())
+			goto found;
+		if (p->major!=PTY_MASTER_MAJOR && p->major!=PTY_SLAVE_MAJOR
+#ifdef CONFIG_UNIX98_PTYS
+		    && (p->major<UNIX98_PTY_MASTER_MAJOR ||
+		    	p->major>UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT-1) &&
+		       (p->major<UNIX98_PTY_SLAVE_MAJOR ||
+		        p->major>UNIX98_PTY_SLAVE_MAJOR+UNIX98_PTY_MAJOR_COUNT-1)
+#endif
+		)
+			goto found;
+		if (ve_is_super(p->owner_env) && ve_is_super(get_exec_env()))
+			goto found;
+		if (!ve_accessible_strict(p->owner_env, get_exec_env()))
+			continue;
+#endif
+		goto found;
 	}
+	read_unlock(&tty_driver_guard);
 	return NULL;
+
+found:
+	read_unlock(&tty_driver_guard);
+	return p;
 }
 
 /*
@@ -1094,7 +1143,7 @@ static void do_tty_hangup(void *data)
 	
 	read_lock(&tasklist_lock);
 	if (tty->session > 0) {
-		do_each_task_pid(tty->session, PIDTYPE_SID, p) {
+		do_each_task_pid_all(tty->session, PIDTYPE_SID, p) {
 			if (p->signal->tty == tty)
 				p->signal->tty = NULL;
 			if (!p->signal->leader)
@@ -1103,7 +1152,7 @@ static void do_tty_hangup(void *data)
 			send_group_sig_info(SIGCONT, SEND_SIG_PRIV, p);
 			if (tty->pgrp > 0)
 				p->signal->tty_old_pgrp = tty->pgrp;
-		} while_each_task_pid(tty->session, PIDTYPE_SID, p);
+		} while_each_task_pid_all(tty->session, PIDTYPE_SID, p);
 	}
 	read_unlock(&tasklist_lock);
 
@@ -1220,9 +1269,9 @@ void disassociate_ctty(int on_exit)
 
 	/* Now clear signal->tty under the lock */
 	read_lock(&tasklist_lock);
-	do_each_task_pid(current->signal->session, PIDTYPE_SID, p) {
+	do_each_task_pid_all(current->signal->session, PIDTYPE_SID, p) {
 		p->signal->tty = NULL;
-	} while_each_task_pid(current->signal->session, PIDTYPE_SID, p);
+	} while_each_task_pid_all(current->signal->session, PIDTYPE_SID, p);
 	read_unlock(&tasklist_lock);
 	up(&tty_sem);
 	unlock_kernel();
@@ -1449,20 +1498,27 @@ static inline void tty_line_name(struct 
  * relaxed for the (most common) case of reopening a tty.
  */
 static int init_dev(struct tty_driver *driver, int idx,
-	struct tty_struct **ret_tty)
+	struct tty_struct *i_tty, struct tty_struct **ret_tty)
 {
 	struct tty_struct *tty, *o_tty;
 	struct termios *tp, **tp_loc, *o_tp, **o_tp_loc;
 	struct termios *ltp, **ltp_loc, *o_ltp, **o_ltp_loc;
+	struct ve_struct * owner;
 	int retval=0;
 
-	/* check whether we're reopening an existing tty */
-	if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
-		tty = devpts_get_tty(idx);
-		if (tty && driver->subtype == PTY_TYPE_MASTER)
-			tty = tty->link;
-	} else {
-		tty = driver->ttys[idx];
+	owner = driver->owner_env;
+
+	if (i_tty)
+		tty = i_tty;
+	else {
+		/* check whether we're reopening an existing tty */
+		if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
+			tty = devpts_get_tty(idx);
+			if (tty && driver->subtype == PTY_TYPE_MASTER)
+				tty = tty->link;
+		} else {
+			tty = driver->ttys[idx];
+		}
 	}
 	if (tty) goto fast_track;
 
@@ -1490,6 +1546,7 @@ static int init_dev(struct tty_driver *d
 	tty->driver = driver;
 	tty->index = idx;
 	tty_line_name(driver, idx, tty->name);
+	tty->owner_env = owner;
 
 	if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
 		tp_loc = &tty->termios;
@@ -1500,7 +1557,7 @@ static int init_dev(struct tty_driver *d
 	}
 
 	if (!*tp_loc) {
-		tp = (struct termios *) kmalloc(sizeof(struct termios),
+		tp = (struct termios *) ub_kmalloc(sizeof(struct termios),
 						GFP_KERNEL);
 		if (!tp)
 			goto free_mem_out;
@@ -1508,7 +1565,7 @@ static int init_dev(struct tty_driver *d
 	}
 
 	if (!*ltp_loc) {
-		ltp = (struct termios *) kmalloc(sizeof(struct termios),
+		ltp = (struct termios *) ub_kmalloc(sizeof(struct termios),
 						 GFP_KERNEL);
 		if (!ltp)
 			goto free_mem_out;
@@ -1523,6 +1580,7 @@ static int init_dev(struct tty_driver *d
 		o_tty->driver = driver->other;
 		o_tty->index = idx;
 		tty_line_name(driver->other, idx, o_tty->name);
+		o_tty->owner_env = owner;
 
 		if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
 			o_tp_loc = &o_tty->termios;
@@ -1534,7 +1592,7 @@ static int init_dev(struct tty_driver *d
 
 		if (!*o_tp_loc) {
 			o_tp = (struct termios *)
-				kmalloc(sizeof(struct termios), GFP_KERNEL);
+				ub_kmalloc(sizeof(struct termios), GFP_KERNEL);
 			if (!o_tp)
 				goto free_mem_out;
 			*o_tp = driver->other->init_termios;
@@ -1542,7 +1600,7 @@ static int init_dev(struct tty_driver *d
 
 		if (!*o_ltp_loc) {
 			o_ltp = (struct termios *)
-				kmalloc(sizeof(struct termios), GFP_KERNEL);
+				ub_kmalloc(sizeof(struct termios), GFP_KERNEL);
 			if (!o_ltp)
 				goto free_mem_out;
 			memset(o_ltp, 0, sizeof(struct termios));
@@ -1560,6 +1618,10 @@ static int init_dev(struct tty_driver *d
 			*o_ltp_loc = o_ltp;
 		o_tty->termios = *o_tp_loc;
 		o_tty->termios_locked = *o_ltp_loc;
+#ifdef CONFIG_VE
+		if (driver->other->refcount == 0)
+			(void)get_ve(owner);
+#endif
 		driver->other->refcount++;
 		if (driver->subtype == PTY_TYPE_MASTER)
 			o_tty->count++;
@@ -1584,6 +1646,10 @@ static int init_dev(struct tty_driver *d
 		*ltp_loc = ltp;
 	tty->termios = *tp_loc;
 	tty->termios_locked = *ltp_loc;
+#ifdef CONFIG_VE
+	if (driver->refcount == 0)
+		(void)get_ve(owner);
+#endif
 	driver->refcount++;
 	tty->count++;
 
@@ -1694,6 +1760,8 @@ static void release_mem(struct tty_struc
 		}
 		o_tty->magic = 0;
 		o_tty->driver->refcount--;
+		if (o_tty->driver->refcount == 0)
+			put_ve(o_tty->owner_env);
 		file_list_lock();
 		list_del_init(&o_tty->tty_files);
 		file_list_unlock();
@@ -1716,6 +1784,8 @@ static void release_mem(struct tty_struc
 
 	tty->magic = 0;
 	tty->driver->refcount--;
+	if (tty->driver->refcount == 0)
+		put_ve(tty->owner_env);
 	file_list_lock();
 	list_del_init(&tty->tty_files);
 	file_list_unlock();
@@ -1739,7 +1809,10 @@ static void release_dev(struct file * fi
 	int	idx;
 	char	buf[64];
 	unsigned long flags;
-	
+#ifdef CONFIG_UNIX98_PTYS
+	struct idr *idr_alloced;
+#endif
+
 	tty = (struct tty_struct *)filp->private_data;
 	if (tty_paranoia_check(tty, filp->f_dentry->d_inode, "release_dev"))
 		return;
@@ -1754,6 +1827,9 @@ static void release_dev(struct file * fi
 	devpts = (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM) != 0;
 	devpts_master = pty_master && devpts;
 	o_tty = tty->link;
+#ifdef CONFIG_UNIX98_PTYS
+	idr_alloced = &__ve_allocated_ptys(tty->owner_env);
+#endif
 
 #ifdef TTY_PARANOIA_CHECK
 	if (idx < 0 || idx >= tty->driver->num) {
@@ -1926,13 +2002,13 @@ static void release_dev(struct file * fi
 		struct task_struct *p;
 
 		read_lock(&tasklist_lock);
-		do_each_task_pid(tty->session, PIDTYPE_SID, p) {
+		do_each_task_pid_all(tty->session, PIDTYPE_SID, p) {
 			p->signal->tty = NULL;
-		} while_each_task_pid(tty->session, PIDTYPE_SID, p);
+		} while_each_task_pid_all(tty->session, PIDTYPE_SID, p);
 		if (o_tty)
-			do_each_task_pid(o_tty->session, PIDTYPE_SID, p) {
+			do_each_task_pid_all(o_tty->session, PIDTYPE_SID, p) {
 				p->signal->tty = NULL;
-			} while_each_task_pid(o_tty->session, PIDTYPE_SID, p);
+			} while_each_task_pid_all(o_tty->session, PIDTYPE_SID, p);
 		read_unlock(&tasklist_lock);
 	}
 
@@ -2007,7 +2083,7 @@ static void release_dev(struct file * fi
 	/* Make this pty number available for reallocation */
 	if (devpts) {
 		down(&allocated_ptys_lock);
-		idr_remove(&allocated_ptys, idx);
+		idr_remove(idr_alloced, idx);
 		up(&allocated_ptys_lock);
 	}
 #endif
@@ -2028,7 +2104,7 @@ static void release_dev(struct file * fi
  */
 static int tty_open(struct inode * inode, struct file * filp)
 {
-	struct tty_struct *tty;
+	struct tty_struct *tty, *c_tty;
 	int noctty, retval;
 	struct tty_driver *driver;
 	int index;
@@ -2041,6 +2117,7 @@ retry_open:
 	noctty = filp->f_flags & O_NOCTTY;
 	index  = -1;
 	retval = 0;
+	c_tty = NULL;
 	
 	down(&tty_sem);
 
@@ -2051,6 +2128,7 @@ retry_open:
 		}
 		driver = current->signal->tty->driver;
 		index = current->signal->tty->index;
+		c_tty = current->signal->tty;
 		filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */
 		/* noctty = 1; */
 		goto got_driver;
@@ -2058,6 +2136,12 @@ retry_open:
 #ifdef CONFIG_VT
 	if (console_use_vt && (device == MKDEV(TTY_MAJOR,0))) {
 		extern struct tty_driver *console_driver;
+#ifdef CONFIG_VE
+		if (!ve_is_super(get_exec_env())) {
+			up(&tty_sem);
+			return -ENODEV;
+		}
+#endif
 		driver = console_driver;
 		index = fg_console;
 		noctty = 1;
@@ -2065,6 +2149,12 @@ retry_open:
 	}
 #endif
 	if (device == MKDEV(TTYAUX_MAJOR,1)) {
+#ifdef CONFIG_VE
+		if (!ve_is_super(get_exec_env())) {
+			up(&tty_sem);
+			return -ENODEV;
+		}
+#endif
 		driver = console_device(&index);
 		if (driver) {
 			/* Don't let /dev/console block */
@@ -2082,7 +2172,7 @@ retry_open:
 		return -ENODEV;
 	}
 got_driver:
-	retval = init_dev(driver, index, &tty);
+	retval = init_dev(driver, index, c_tty, &tty);
 	up(&tty_sem);
 	if (retval)
 		return retval;
@@ -2151,11 +2241,11 @@ static int ptmx_open(struct inode * inod
 
 	/* find a device that is not in use. */
 	down(&allocated_ptys_lock);
-	if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) {
+	if (!idr_pre_get(&ve_allocated_ptys, GFP_KERNEL)) {
 		up(&allocated_ptys_lock);
 		return -ENOMEM;
 	}
-	idr_ret = idr_get_new(&allocated_ptys, NULL, &index);
+	idr_ret = idr_get_new(&ve_allocated_ptys, NULL, &index);
 	if (idr_ret < 0) {
 		up(&allocated_ptys_lock);
 		if (idr_ret == -EAGAIN)
@@ -2163,14 +2253,14 @@ static int ptmx_open(struct inode * inod
 		return -EIO;
 	}
 	if (index >= pty_limit) {
-		idr_remove(&allocated_ptys, index);
+		idr_remove(&ve_allocated_ptys, index);
 		up(&allocated_ptys_lock);
 		return -EIO;
 	}
 	up(&allocated_ptys_lock);
 
 	down(&tty_sem);
-	retval = init_dev(ptm_driver, index, &tty);
+	retval = init_dev(ve_ptm_driver, index, NULL,  &tty);
 	up(&tty_sem);
 	
 	if (retval)
@@ -2185,14 +2275,15 @@ static int ptmx_open(struct inode * inod
 		goto out1;
 
 	check_tty_count(tty, "tty_open");
-	retval = ptm_driver->open(tty, filp);
+	retval = ve_ptm_driver->open(tty, filp);
 	if (!retval)
 		return 0;
 out1:
 	release_dev(filp);
+	return retval;
 out:
 	down(&allocated_ptys_lock);
-	idr_remove(&allocated_ptys, index);
+	idr_remove(&ve_allocated_ptys, index);
 	up(&allocated_ptys_lock);
 	return retval;
 }
@@ -2305,6 +2396,8 @@ static int tioccons(struct file *file)
 {
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
+	if (!ve_is_super(get_exec_env()))
+		return -EACCES;
 	if (file->f_op->write == redirected_tty_write) {
 		struct file *f;
 		spin_lock(&redirect_lock);
@@ -2365,9 +2458,9 @@ static int tiocsctty(struct tty_struct *
 			 */
 
 			read_lock(&tasklist_lock);
-			do_each_task_pid(tty->session, PIDTYPE_SID, p) {
+			do_each_task_pid_all(tty->session, PIDTYPE_SID, p) {
 				p->signal->tty = NULL;
-			} while_each_task_pid(tty->session, PIDTYPE_SID, p);
+			} while_each_task_pid_all(tty->session, PIDTYPE_SID, p);
 			read_unlock(&tasklist_lock);
 		} else
 			return -EPERM;
@@ -2389,7 +2482,7 @@ static int tiocgpgrp(struct tty_struct *
 	 */
 	if (tty == real_tty && current->signal->tty != real_tty)
 		return -ENOTTY;
-	return put_user(real_tty->pgrp, p);
+	return put_user(pid_type_to_vpid(PIDTYPE_PGID, real_tty->pgrp), p);
 }
 
 static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
@@ -2409,6 +2502,9 @@ static int tiocspgrp(struct tty_struct *
 		return -EFAULT;
 	if (pgrp < 0)
 		return -EINVAL;
+	pgrp = vpid_to_pid(pgrp);
+	if (pgrp < 0)
+		return -EPERM;
 	if (session_of_pgrp(pgrp) != current->signal->session)
 		return -EPERM;
 	real_tty->pgrp = pgrp;
@@ -2425,7 +2521,7 @@ static int tiocgsid(struct tty_struct *t
 		return -ENOTTY;
 	if (real_tty->session <= 0)
 		return -ENOTTY;
-	return put_user(real_tty->session, p);
+	return put_user(pid_type_to_vpid(PIDTYPE_SID, real_tty->session), p);
 }
 
 static int tiocsetd(struct tty_struct *tty, int __user *p)
@@ -2713,7 +2809,7 @@ static void __do_SAK(void *arg)
 		tty->driver->flush_buffer(tty);
 	
 	read_lock(&tasklist_lock);
-	do_each_task_pid(session, PIDTYPE_SID, p) {
+	do_each_task_pid_all(session, PIDTYPE_SID, p) {
 		if (p->signal->tty == tty || session > 0) {
 			printk(KERN_NOTICE "SAK: killed process %d"
 			    " (%s): p->signal->session==tty->session\n",
@@ -2745,7 +2841,7 @@ static void __do_SAK(void *arg)
 			spin_unlock(&p->files->file_lock);
 		}
 		task_unlock(p);
-	} while_each_task_pid(session, PIDTYPE_SID, p);
+	} while_each_task_pid_all(session, PIDTYPE_SID, p);
 	read_unlock(&tasklist_lock);
 #endif
 }
@@ -3120,8 +3216,11 @@ int tty_register_driver(struct tty_drive
 
 	if (!driver->put_char)
 		driver->put_char = tty_default_put_char;
-	
+
+	driver->owner_env = get_exec_env();
+	write_lock_irq(&tty_driver_guard);
 	list_add(&driver->tty_drivers, &tty_drivers);
+	write_unlock_irq(&tty_driver_guard);
 	
 	if ( !(driver->flags & TTY_DRIVER_NO_DEVFS) ) {
 		for(i = 0; i < driver->num; i++)
@@ -3148,7 +3247,9 @@ int tty_unregister_driver(struct tty_dri
 	unregister_chrdev_region(MKDEV(driver->major, driver->minor_start),
 				driver->num);
 
+	write_lock_irq(&tty_driver_guard);
 	list_del(&driver->tty_drivers);
+	write_unlock_irq(&tty_driver_guard);
 
 	/*
 	 * Free the termios and termios_locked structures because
@@ -3274,6 +3375,44 @@ static int __init tty_init(void)
 	vty_init();
  out_vt:
 #endif
+	prepare_tty();
 	return 0;
 }
 module_init(tty_init);
+
+#ifdef CONFIG_UNIX98_PTYS
+struct class *init_ve_tty_class(void)
+{
+	struct class * ve_tty_class;
+	struct class_device * ve_ptmx_dev_class;
+
+	ve_tty_class = class_create(THIS_MODULE, "tty");
+	if (IS_ERR(ve_tty_class))
+		return ve_tty_class;
+
+	ve_ptmx_dev_class = class_device_create(ve_tty_class, NULL,
+				MKDEV(TTYAUX_MAJOR, 2), NULL, "ptmx");
+	if (IS_ERR(ve_ptmx_dev_class)) {
+		class_destroy(ve_tty_class);
+		return (struct class *)ve_ptmx_dev_class;
+	}
+
+	return ve_tty_class;
+}
+
+void fini_ve_tty_class(struct class *ve_tty_class)
+{
+	class_device_destroy(ve_tty_class, MKDEV(TTYAUX_MAJOR, 2));
+	class_destroy(ve_tty_class);
+}
+#else
+struct class *init_ve_tty_class(void)
+{
+	return NULL;
+}
+void fini_ve_tty_class(struct class *ve_tty_class)
+{
+}
+#endif
+EXPORT_SYMBOL(init_ve_tty_class);
+EXPORT_SYMBOL(fini_ve_tty_class);
diff -upr linux-2.6.16.46-0.12.orig/drivers/net/8139too.c linux-2.6.16.46-0.12-027test011/drivers/net/8139too.c
--- linux-2.6.16.46-0.12.orig/drivers/net/8139too.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/drivers/net/8139too.c	2007-08-28 17:35:30.000000000 +0400
@@ -1605,7 +1605,7 @@ static void rtl8139_thread (void *_data)
 	if (tp->watchdog_fired) {
 		tp->watchdog_fired = 0;
 		rtl8139_tx_timeout_task(_data);
-	} else if (rtnl_shlock_nowait() == 0) {
+	} else if (rtnl_trylock()) {
 		rtl8139_thread_iter (dev, tp, tp->mmio_addr);
 		rtnl_unlock ();
 	} else {
diff -upr linux-2.6.16.46-0.12.orig/drivers/net/Makefile linux-2.6.16.46-0.12-027test011/drivers/net/Makefile
--- linux-2.6.16.46-0.12.orig/drivers/net/Makefile	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/drivers/net/Makefile	2007-08-28 17:35:32.000000000 +0400
@@ -19,6 +19,12 @@ gianfar_driver-objs := gianfar.o \
 		gianfar_mii.o \
 		gianfar_sysfs.o
 
+obj-$(CONFIG_VE_NETDEV) += vznetdev.o
+vznetdev-objs := open_vznet.o venet_core.o
+
+obj-$(CONFIG_VE_ETHDEV) += vzethdev.o
+vzethdev-objs := veth.o
+
 #
 # link order important here
 #
diff -upr linux-2.6.16.46-0.12.orig/drivers/net/loopback.c linux-2.6.16.46-0.12-027test011/drivers/net/loopback.c
--- linux-2.6.16.46-0.12.orig/drivers/net/loopback.c	2007-08-24 19:28:35.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/drivers/net/loopback.c	2007-08-28 17:35:33.000000000 +0400
@@ -59,6 +59,13 @@
 #include <linux/percpu.h>
 
 static DEFINE_PER_CPU(struct net_device_stats, loopback_stats);
+#ifdef CONFIG_VE
+#define LOOPBACK_STATS(cpu)	((ve_is_super(get_exec_env())) ?	\
+				&per_cpu(loopback_stats, cpu) :		\
+				per_cpu_ptr(get_exec_env()->_loopback_stats, cpu))
+#else
+#define LOOPBACK_STATS(cpu)	&per_cpu(loopback_stats, cpu)
+#endif
 
 #define LOOPBACK_OVERHEAD (128 + MAX_HEADER + 16 + 16)
 
@@ -130,6 +137,11 @@ static int loopback_xmit(struct sk_buff 
 {
 	struct net_device_stats *lb_stats;
 
+	if (unlikely(get_exec_env()->disable_net)) {
+		kfree_skb(skb);
+		return 0;
+	}
+
 	skb_orphan(skb);
 
 	skb->protocol = eth_type_trans(skb,dev);
@@ -149,7 +161,7 @@ static int loopback_xmit(struct sk_buff 
 #endif
 	dev->last_rx = jiffies;
 
-	lb_stats = &per_cpu(loopback_stats, get_cpu());
+	lb_stats = LOOPBACK_STATS(get_cpu());
 	lb_stats->rx_bytes += skb->len;
 	lb_stats->tx_bytes = lb_stats->rx_bytes;
 	lb_stats->rx_packets++;
@@ -177,7 +189,7 @@ static struct net_device_stats *get_stat
 
 		if (!cpu_possible(i)) 
 			continue;
-		lb_stats = &per_cpu(loopback_stats, i);
+		lb_stats = LOOPBACK_STATS(i);
 		stats->rx_bytes   += lb_stats->rx_bytes;
 		stats->tx_bytes   += lb_stats->tx_bytes;
 		stats->rx_packets += lb_stats->rx_packets;
@@ -198,6 +210,34 @@ static struct ethtool_ops loopback_ethto
 	.set_tso		= ethtool_op_set_tso,
 };
 
+static void loopback_destructor(struct net_device *dev)
+{
+	kfree(dev->priv);
+	dev->priv = NULL;
+}
+
+struct net_device templ_loopback_dev = {
+	.name	 		= "lo",
+	.mtu			= (16 * 1024) + 20 + 20 + 12,
+	.hard_start_xmit	= loopback_xmit,
+	.hard_header		= eth_header,
+	.hard_header_cache	= eth_header_cache,
+	.header_cache_update	= eth_header_cache_update,
+	.hard_header_len	= ETH_HLEN,	/* 14	*/
+	.addr_len		= ETH_ALEN,	/* 6	*/
+	.tx_queue_len		= 0,
+	.type			= ARPHRD_LOOPBACK,	/* 0x0001*/
+	.rebuild_header		= eth_rebuild_header,
+	.flags			= IFF_LOOPBACK,
+	.features 		= NETIF_F_SG|NETIF_F_FRAGLIST
+				  |NETIF_F_NO_CSUM|NETIF_F_HIGHDMA
+				  |NETIF_F_LLTX|NETIF_F_VIRTUAL,
+};
+
+#ifdef loopback_dev
+#undef loopback_dev
+#endif
+
 struct net_device loopback_dev = {
 	.name	 		= "lo",
 	.mtu			= (16 * 1024) + 20 + 20 + 12,
@@ -231,9 +271,13 @@ int __init loopback_init(void)
 		memset(stats, 0, sizeof(struct net_device_stats));
 		loopback_dev.priv = stats;
 		loopback_dev.get_stats = &get_stats;
+		loopback_dev.destructor = &loopback_destructor;
 	}
-	
+#ifdef CONFIG_VE
+	get_ve0()->_loopback_dev = &loopback_dev;
+#endif
 	return register_netdev(&loopback_dev);
 };
 
 EXPORT_SYMBOL(loopback_dev);
+EXPORT_SYMBOL(templ_loopback_dev);
diff -upr linux-2.6.16.46-0.12.orig/drivers/net/open_vznet.c linux-2.6.16.46-0.12-027test011/drivers/net/open_vznet.c
--- linux-2.6.16.46-0.12.orig/drivers/net/open_vznet.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/drivers/net/open_vznet.c	2007-08-28 17:35:32.000000000 +0400
@@ -0,0 +1,244 @@
+/*
+ *  open_vznet.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+/*
+ * Virtual Networking device used to change VE ownership on packets
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+
+#include <linux/inet.h>
+#include <net/ip.h>
+#include <linux/skbuff.h>
+#include <linux/venet.h>
+
+void veip_stop(struct ve_struct *ve)
+{
+	struct list_head *p, *tmp;
+
+	write_lock_irq(&veip_hash_lock);
+	if (ve->veip == NULL)
+		goto unlock;
+	list_for_each_safe(p, tmp, &ve->veip->ip_lh) {
+		struct ip_entry_struct *ptr;
+		ptr = list_entry(p, struct ip_entry_struct, ve_list);
+		ptr->active_env = NULL;
+		list_del(&ptr->ve_list);
+		list_del(&ptr->ip_hash);
+		kfree(ptr);
+	}
+	veip_put(ve->veip);
+	ve->veip = NULL;
+	if (!ve_is_super(ve))
+		module_put(THIS_MODULE);
+unlock:
+	write_unlock_irq(&veip_hash_lock);
+}
+
+int veip_start(struct ve_struct *ve)
+{
+	int err, get;
+
+	err = 0;
+	write_lock_irq(&veip_hash_lock);
+	get = ve->veip == NULL;
+	ve->veip = veip_findcreate(ve->veid);
+	if (ve->veip == NULL)
+		err = -ENOMEM;
+	write_unlock_irq(&veip_hash_lock);
+	if (err == 0 && get && !ve_is_super(ve))
+		__module_get(THIS_MODULE);
+	return err;
+}
+
+int veip_entry_add(struct ve_struct *ve, struct ve_addr_struct *addr)
+{
+	struct ip_entry_struct *entry, *found;
+	int err;
+
+	entry = kzalloc(sizeof(struct ip_entry_struct), GFP_KERNEL);
+	if (entry == NULL)
+		return -ENOMEM;
+
+	if (ve->veip == NULL) {
+		/* This can happen if we load venet AFTER ve was started */
+	       	err = veip_start(ve);
+		if (err < 0)
+			goto out;
+	}
+
+	write_lock_irq(&veip_hash_lock);
+	err = -EADDRINUSE;
+	found = venet_entry_lookup(addr);
+	if (found != NULL)
+		goto out_unlock;
+
+	entry->active_env = ve;
+	entry->addr = *addr;
+	ip_entry_hash(entry, ve->veip);
+
+	err = 0;
+	entry = NULL;
+out_unlock:
+	write_unlock_irq(&veip_hash_lock);
+out:
+	if (entry != NULL)
+		kfree(entry);
+	return err;
+}
+
+int veip_entry_del(envid_t veid, struct ve_addr_struct *addr)
+{
+	struct ip_entry_struct *found;
+	int err;
+
+	err = -EADDRNOTAVAIL;
+	write_lock_irq(&veip_hash_lock);
+	found = venet_entry_lookup(addr);
+	if (found == NULL)
+		goto out;
+	if (found->active_env->veid != veid)
+		goto out;
+
+	err = 0;
+	found->active_env = NULL;
+
+	list_del(&found->ip_hash);
+	list_del(&found->ve_list);
+	kfree(found);
+out:
+	write_unlock_irq(&veip_hash_lock);
+	return err;
+}
+
+static int skb_extract_addr(struct sk_buff *skb,
+		struct ve_addr_struct *addr, int dir)
+{
+	switch (skb->protocol) {
+	case __constant_htons(ETH_P_IP):
+		addr->family = AF_INET;
+		addr->key[0] = 0;
+		addr->key[1] = 0;
+		addr->key[2] = 0;
+		addr->key[3] = (dir ? skb->nh.iph->daddr : skb->nh.iph->saddr);
+		return 0;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case __constant_htons(ETH_P_IPV6):
+		addr->family = AF_INET6;
+		memcpy(&addr->key, dir ?
+				skb->nh.ipv6h->daddr.s6_addr32 :
+				skb->nh.ipv6h->saddr.s6_addr32,
+				sizeof(addr->key));
+		return 0;
+#endif
+	}
+
+	return -EAFNOSUPPORT;
+}
+
+static struct ve_struct *venet_find_ve(struct sk_buff *skb, int dir)
+{
+	struct ip_entry_struct *entry;
+	struct ve_addr_struct addr;
+
+	if (skb_extract_addr(skb, &addr, dir) < 0)
+		return NULL;
+
+	entry = venet_entry_lookup(&addr);
+	if (entry == NULL)
+		return NULL;
+
+	return entry->active_env;
+}
+
+int venet_change_skb_owner(struct sk_buff *skb)
+{
+	struct ve_struct *ve, *ve_old;
+
+	ve_old = skb->owner_env;
+
+	read_lock(&veip_hash_lock);
+	if (!ve_is_super(ve_old)) {
+		/* from VE to host */
+		ve = venet_find_ve(skb, 0);
+		if (ve == NULL)
+			goto out_drop;
+		if (!ve_accessible_strict(ve, ve_old))
+			goto out_source;
+		skb->owner_env = get_ve0();
+	} else {
+		/* from host to VE */
+		ve = venet_find_ve(skb, 1);
+		if (ve == NULL)
+			goto out_drop;
+		skb->owner_env = ve;
+	}
+	read_unlock(&veip_hash_lock);
+
+	return 0;
+
+out_drop:
+	read_unlock(&veip_hash_lock);
+	return -ESRCH;
+
+out_source:
+	read_unlock(&veip_hash_lock);
+	if (net_ratelimit() && skb->protocol == __constant_htons(ETH_P_IP)) {
+		printk(KERN_WARNING "Dropped packet, source wrong "
+		       "veid=%u src-IP=%u.%u.%u.%u "
+		       "dst-IP=%u.%u.%u.%u\n",
+		       skb->owner_env->veid,
+		       NIPQUAD(skb->nh.iph->saddr),
+		       NIPQUAD(skb->nh.iph->daddr));
+	}
+	return -EACCES;
+}
+
+#ifdef CONFIG_PROC_FS
+int veip_seq_show(struct seq_file *m, void *v)
+{
+	struct list_head *p;
+	struct ip_entry_struct *entry;
+	char s[40];
+
+	p = (struct list_head *)v;
+	if (p == ip_entry_hash_table) {
+		seq_puts(m, "Version: 2.5\n");
+		return 0;
+	}
+	entry = list_entry(p, struct ip_entry_struct, ip_hash);
+	veaddr_print(s, sizeof(s), &entry->addr);
+	seq_printf(m, "%39s %10u\n", s, 0);
+	return 0;
+}
+#endif
+
+__exit void veip_cleanup(void)
+{
+	int i;
+
+	write_lock_irq(&veip_hash_lock);
+	for (i = 0; i < VEIP_HASH_SZ; i++)
+		while (!list_empty(ip_entry_hash_table + i)) {
+			struct ip_entry_struct *entry;
+
+			entry = list_first_entry(ip_entry_hash_table + i,
+					struct ip_entry_struct, ip_hash);
+			list_del(&entry->ip_hash);
+			kfree(entry);
+		}
+	write_unlock_irq(&veip_hash_lock);
+}
+
+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
+MODULE_DESCRIPTION("Virtuozzo Virtual Network Device");
+MODULE_LICENSE("GPL v2");
diff -upr linux-2.6.16.46-0.12.orig/drivers/net/tun.c linux-2.6.16.46-0.12-027test011/drivers/net/tun.c
--- linux-2.6.16.46-0.12.orig/drivers/net/tun.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/drivers/net/tun.c	2007-08-28 17:35:32.000000000 +0400
@@ -62,6 +62,7 @@
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
+#include <ub/beancounter.h>
 
 #ifdef TUN_DEBUG
 static int debug;
@@ -90,6 +91,9 @@ static int tun_net_close(struct net_devi
 static int tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct tun_struct *tun = netdev_priv(dev);
+#if 0
+	struct user_beancounter *ub;
+#endif
 
 	DBG(KERN_INFO "%s: tun_net_xmit %d\n", tun->dev->name, skb->len);
 
@@ -114,6 +118,24 @@ static int tun_net_xmit(struct sk_buff *
 		}
 	}
 
+	/*
+	 * XXX this code is broken:
+	 * See comment in dev_queue_xmit
+	 */
+#if 0
+	ub = netdev_bc(dev)->exec_ub;
+	if (ub && (skb_bc(skb)->charged == 0)) {
+		unsigned long charge;
+		charge = skb_charge_fullsize(skb);
+		if (charge_beancounter(ub, UB_OTHERSOCKBUF, charge, 1))
+			goto drop;
+		get_beancounter(ub);
+		skb_bc(skb)->ub = ub;
+		skb_bc(skb)->charged = charge;
+		skb_bc(skb)->resource = UB_OTHERSOCKBUF;
+	}
+#endif
+
 	/* Queue packet */
 	skb_queue_tail(&tun->readq, skb);
 	dev->trans_start = jiffies;
@@ -410,12 +432,14 @@ static ssize_t tun_chr_readv(struct file
 					tun->dev->name, addr[0], addr[1], addr[2],
 					addr[3], addr[4], addr[5]);
 			ret = tun_put_user(tun, skb, (struct iovec *) iv, len);
+			/* skb will be uncharged in kfree_skb() */
 			kfree_skb(skb);
 			break;
 		} else {
 			DBG(KERN_DEBUG "%s: tun_chr_readv: rejected: %x:%x:%x:%x:%x:%x\n",
 					tun->dev->name, addr[0], addr[1], addr[2],
 					addr[3], addr[4], addr[5]);
+			/* skb will be uncharged in kfree_skb() */
 			kfree_skb(skb);
 			continue;
 		}
@@ -451,6 +475,7 @@ static void tun_setup(struct net_device 
 	dev->get_stats = tun_net_stats;
 	dev->ethtool_ops = &tun_ethtool_ops;
 	dev->destructor = free_netdev;
+	dev->features |= NETIF_F_VIRTUAL;
 }
 
 static struct tun_struct *tun_get_by_name(const char *name)
@@ -459,8 +484,9 @@ static struct tun_struct *tun_get_by_nam
 
 	ASSERT_RTNL();
 	list_for_each_entry(tun, &tun_dev_list, list) {
-		if (!strncmp(tun->dev->name, name, IFNAMSIZ))
-		    return tun;
+		if (ve_accessible_strict(tun->dev->owner_env, get_exec_env()) &&
+		    !strncmp(tun->dev->name, name, IFNAMSIZ))
+			return tun;
 	}
 
 	return NULL;
@@ -479,7 +505,8 @@ static int tun_set_iff(struct file *file
 
 		/* Check permissions */
 		if (tun->owner != -1 &&
-		    current->euid != tun->owner && !capable(CAP_NET_ADMIN))
+		    current->euid != tun->owner &&
+		    !capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
 			return -EPERM;
 	} 
 	else if (__dev_get_by_name(ifr->ifr_name)) 
@@ -601,6 +628,9 @@ static int tun_chr_ioctl(struct inode *i
 		break;
 
 	case TUNSETPERSIST:
+		/* prohibit persist mode iniside VE */
+		if (!ve_is_super(get_exec_env()))
+			return -EPERM;
 		/* Disable/Enable persist mode */
 		if (arg)
 			tun->flags |= TUN_PERSIST;
diff -upr linux-2.6.16.46-0.12.orig/drivers/net/venet_core.c linux-2.6.16.46-0.12-027test011/drivers/net/venet_core.c
--- linux-2.6.16.46-0.12.orig/drivers/net/venet_core.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/drivers/net/venet_core.c	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,750 @@
+/*
+ *  venet_core.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+/*
+ * Common part for Virtuozzo virtual network devices
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/in.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/tcp.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <net/addrconf.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/unistd.h>
+
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <net/ip.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/if_ether.h>	/* For the statistics structure. */
+#include <linux/if_arp.h>	/* For ARPHRD_ETHER */
+#include <linux/venet.h>
+#include <linux/ve_proto.h>
+#include <linux/vzctl.h>
+#include <linux/vzctl_venet.h>
+
+struct list_head ip_entry_hash_table[VEIP_HASH_SZ];
+rwlock_t veip_hash_lock = RW_LOCK_UNLOCKED;
+LIST_HEAD(veip_lh);
+
+struct venet_stats {
+	struct net_device_stats	stats;
+	struct net_device_stats	*real_stats;
+};
+
+static inline struct net_device_stats *
+venet_stats(struct net_device *dev, int cpu)
+{
+	struct venet_stats *stats;
+	stats = (struct venet_stats*)dev->priv;
+	return per_cpu_ptr(stats->real_stats, cpu);
+}
+
+
+#define ip_entry_hash_function(ip)  (ntohl(ip) & (VEIP_HASH_SZ - 1))
+
+void ip_entry_hash(struct ip_entry_struct *entry, struct veip_struct *veip)
+{
+	list_add(&entry->ip_hash,
+			ip_entry_hash_table +
+			ip_entry_hash_function(entry->addr.key[3]));
+	list_add(&entry->ve_list, &veip->ip_lh);
+}
+
+void veip_put(struct veip_struct *veip)
+{
+	if (!list_empty(&veip->ip_lh))
+		return;
+	if (!list_empty(&veip->src_lh))
+		return;
+	if (!list_empty(&veip->dst_lh))
+		return;
+
+	list_del(&veip->list);
+	kfree(veip);
+}
+
+struct ip_entry_struct *venet_entry_lookup(struct ve_addr_struct *addr)
+{
+	struct ip_entry_struct *entry;
+
+	list_for_each_entry (entry, ip_entry_hash_table +
+			ip_entry_hash_function(addr->key[3]), ip_hash)
+		if (memcmp(&entry->addr, addr, sizeof(*addr)) == 0)
+			return entry;
+	return NULL;
+}
+
+struct veip_struct *veip_find(envid_t veid)
+{
+	struct veip_struct *ptr;
+
+	list_for_each_entry(ptr, &veip_lh, list) {
+		if (ptr->veid != veid)
+			continue;
+		return ptr;
+	}
+	return NULL;
+}
+
+struct veip_struct *veip_findcreate(envid_t veid)
+{
+	struct veip_struct *ptr;
+
+	ptr = veip_find(veid);
+	if (ptr != NULL)
+		return ptr;
+
+	ptr = kmalloc(sizeof(struct veip_struct), GFP_ATOMIC);
+	if (ptr == NULL)
+		return NULL;
+	memset(ptr, 0, sizeof(struct veip_struct));
+	INIT_LIST_HEAD(&ptr->ip_lh);
+	INIT_LIST_HEAD(&ptr->src_lh);
+	INIT_LIST_HEAD(&ptr->dst_lh);
+	ptr->veid = veid;
+	list_add(&ptr->list, &veip_lh);
+	return ptr;
+}
+
+static int convert_sockaddr(struct sockaddr *addr, int addrlen,
+		struct ve_addr_struct *veaddr)
+{
+	int err;
+
+	switch (addr->sa_family) {
+	case AF_INET: {
+		struct sockaddr_in *sin;
+
+		err = -EINVAL;
+		if (addrlen != sizeof(struct sockaddr_in))
+			break;
+
+		err = 0;
+		sin = (struct sockaddr_in *)addr;
+		veaddr->family = AF_INET;
+		veaddr->key[0] = 0;
+		veaddr->key[1] = 0;
+		veaddr->key[2] = 0;
+		veaddr->key[3] = sin->sin_addr.s_addr;
+		break;
+	}
+	case AF_INET6: {
+		struct sockaddr_in6 *sin;
+
+		err = -EINVAL;
+		if (addrlen != sizeof(struct sockaddr_in6))
+			break;
+
+		err = 0;
+		sin = (struct sockaddr_in6 *)addr;
+		veaddr->family = AF_INET6;
+		memcpy(veaddr->key, &sin->sin6_addr, sizeof(veaddr->key));
+		break;
+	}
+	default:
+		err = -EAFNOSUPPORT;
+	}
+	return err;
+}
+
+int sockaddr_to_veaddr(struct sockaddr __user *uaddr, int addrlen,
+		struct ve_addr_struct *veaddr)
+{
+	int err;
+	char addr[MAX_SOCK_ADDR];
+
+	err = move_addr_to_kernel(uaddr, addrlen, &addr);
+	if (err < 0)
+		goto out;
+
+	err = convert_sockaddr((struct sockaddr *)&addr, addrlen, veaddr);
+out:
+	return err;
+}
+
+void veaddr_print(char *str, int len, struct ve_addr_struct *a)
+{
+	if (a->family == AF_INET)
+		snprintf(str, len, "%u.%u.%u.%u", NIPQUAD(a->key[3]));
+	else
+		snprintf(str, len, "%x:%x:%x:%x:%x:%x:%x:%x",
+				ntohl(a->key[0])>>16, ntohl(a->key[0])&0xFFFF,
+				ntohl(a->key[1])>>16, ntohl(a->key[1])&0xFFFF,
+				ntohl(a->key[2])>>16, ntohl(a->key[2])&0xFFFF,
+				ntohl(a->key[3])>>16, ntohl(a->key[3])&0xFFFF
+			);
+}
+
+/*
+ * Device functions
+ */
+
+static int venet_open(struct net_device *dev)
+{
+	if (!ve_is_super(get_exec_env()) && !try_module_get(THIS_MODULE))
+		return -EBUSY;
+	return 0;
+}
+
+static int venet_close(struct net_device *master)
+{
+	if (!ve_is_super(get_exec_env()))
+		module_put(THIS_MODULE);
+	return 0;
+}
+
+static void venet_destructor(struct net_device *dev)
+{
+	struct venet_stats *stats = (struct venet_stats *)dev->priv;
+	if (stats == NULL)
+		return;
+	free_percpu(stats->real_stats);
+	kfree(stats);
+	dev->priv = NULL;
+}
+
+/*
+ * The higher levels take care of making this non-reentrant (it's
+ * called with bh's disabled).
+ */
+static int venet_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct net_device_stats *stats;
+	struct net_device *rcv = NULL;
+	int length;
+
+	stats = venet_stats(dev, smp_processor_id());
+	if (unlikely(get_exec_env()->disable_net))
+		goto outf;
+
+	if (skb->protocol == __constant_htons(ETH_P_IP)) {
+		struct iphdr *iph;
+		iph = skb->nh.iph;
+		if (MULTICAST(iph->daddr))
+			goto outf;
+	} else if (skb->protocol == __constant_htons(ETH_P_IPV6)) {
+		struct ipv6hdr *ip6h;
+		ip6h = skb->nh.ipv6h;
+		if (ipv6_addr_is_multicast(&ip6h->daddr))
+			goto outf;
+		skb_orphan(skb);
+	} else {
+		goto outf;
+	}
+
+	if (venet_change_skb_owner(skb) < 0)
+		goto outf;
+
+	if (unlikely(skb->owner_env->disable_net))
+		goto outf;
+
+	rcv = skb->owner_env->_venet_dev;
+	if (!rcv)
+		/* VE going down */
+		goto outf;
+
+	dev_hold(rcv);
+
+	if (!(rcv->flags & IFF_UP)) {
+		/* Target VE does not want to receive packets */
+		dev_put(rcv);
+		goto outf;
+	}
+
+	skb->pkt_type = PACKET_HOST;
+	skb->dev = rcv;
+
+	skb->mac.raw = skb->data;
+	memset(skb->data - dev->hard_header_len, 0, dev->hard_header_len);
+
+	dst_release(skb->dst);
+	skb->dst = NULL;
+#ifdef CONFIG_NETFILTER
+	nf_conntrack_put(skb->nfct);
+	skb->nfct = NULL;
+#endif
+	length = skb->len;
+
+	netif_rx(skb);
+
+	stats->tx_bytes += length;
+	stats->tx_packets++;
+	if (rcv) {
+		struct net_device_stats *rcv_stats;
+
+		rcv_stats = venet_stats(rcv, smp_processor_id());
+		rcv_stats->rx_bytes += length;
+		rcv_stats->rx_packets++;
+		dev_put(rcv);
+	}
+
+	return 0;
+
+outf:
+	kfree_skb(skb);
+	++stats->tx_dropped;
+out:
+	return 0;
+}
+
+static struct net_device_stats *get_stats(struct net_device *dev)
+{
+	int i;
+	struct venet_stats *stats;
+
+	stats = (struct venet_stats *)dev->priv;
+	memset(&stats->stats, 0, sizeof(struct net_device_stats));
+	for (i=0; i < NR_CPUS; i++) {
+		struct net_device_stats *dev_stats;
+
+		if (!cpu_possible(i)) 
+			continue;
+		dev_stats = venet_stats(dev, i);
+		stats->stats.rx_bytes   += dev_stats->rx_bytes;
+		stats->stats.tx_bytes   += dev_stats->tx_bytes;
+		stats->stats.rx_packets += dev_stats->rx_packets;
+		stats->stats.tx_packets += dev_stats->tx_packets;
+	}
+
+	return &stats->stats;
+}
+
+/* Initialize the rest of the LOOPBACK device. */
+int venet_init_dev(struct net_device *dev)
+{
+	struct venet_stats *stats;
+
+	dev->hard_start_xmit = venet_xmit;
+	stats = kzalloc(sizeof(struct venet_stats), GFP_KERNEL);
+	if (stats == NULL)
+		goto fail;
+	stats->real_stats = alloc_percpu(struct net_device_stats);
+	if (stats->real_stats == NULL)
+		goto fail_free;
+	dev->priv = stats;
+
+	dev->get_stats = get_stats;
+	dev->open = venet_open;
+	dev->stop = venet_close;
+	dev->destructor = venet_destructor;
+
+	/*
+	 *	Fill in the generic fields of the device structure.
+	 */
+	dev->type		= ARPHRD_VOID;
+	dev->hard_header_len 	= ETH_HLEN;
+	dev->mtu		= 1500; /* eth_mtu */
+	dev->tx_queue_len	= 0;
+
+	memset(dev->broadcast, 0xFF, ETH_ALEN);
+
+	/* New-style flags. */
+	dev->flags		= IFF_BROADCAST|IFF_NOARP|IFF_POINTOPOINT;
+	return 0;
+
+fail_free:
+	kfree(stats);
+fail:
+	return -ENOMEM;
+}
+
+static void venet_setup(struct net_device *dev)
+{
+	dev->init = venet_init_dev;
+	/*
+	 * No other features, as they are:
+	 *  - checksumming is required, and nobody else will done our job
+	 */
+	dev->features |= NETIF_F_VENET | NETIF_F_VIRTUAL | NETIF_F_LLTX;
+}
+
+#ifdef CONFIG_PROC_FS
+static int veinfo_seq_show(struct seq_file *m, void *v)
+{
+	struct ve_struct *ve;
+	struct ip_entry_struct *entry;
+
+	ve = list_entry((struct list_head *)v, struct ve_struct, ve_list);
+
+	seq_printf(m, "%10u %5u %5u", ve->veid,
+                                ve->class_id, atomic_read(&ve->pcounter));
+	read_lock(&veip_hash_lock);
+	if (ve->veip == NULL)
+		goto unlock;
+	list_for_each_entry (entry, &ve->veip->ip_lh, ve_list) {
+		char addr[40];
+
+		if (entry->active_env == NULL)
+			continue;
+
+		veaddr_print(addr, sizeof(addr), &entry->addr);
+		if (entry->addr.family == AF_INET)
+			seq_printf(m, " %15s", addr);
+		else
+			seq_printf(m, " %39s", addr);
+	}
+unlock:
+	read_unlock(&veip_hash_lock);
+	seq_putc(m, '\n');
+	return 0;
+}
+
+static void *ve_seq_start(struct seq_file *m, loff_t *pos)
+{
+	struct ve_struct *curve;
+	struct list_head *entry;
+	loff_t l;
+
+	curve = get_exec_env();
+	read_lock(&ve_list_lock);
+	if (!ve_is_super(curve)) {
+		if (*pos != 0)
+			return NULL;
+		return curve;
+	}
+
+	l = *pos;
+	list_for_each(entry, &ve_list_head) {
+		if (l == 0)
+			return entry;
+		l--;
+	}
+	return NULL;
+}
+
+static void *ve_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct list_head *entry;
+
+	entry = (struct list_head *)v;
+	if (!ve_is_super(get_exec_env()))
+		return NULL;
+	(*pos)++;
+	return entry->next == &ve_list_head ? NULL : entry->next;
+}
+
+static void ve_seq_stop(struct seq_file *m, void *v)
+{
+	read_unlock(&ve_list_lock);
+}
+
+
+static struct seq_operations veinfo_seq_op = {
+        .start	= ve_seq_start,
+        .next	= ve_seq_next,
+        .stop	= ve_seq_stop,
+        .show	= veinfo_seq_show,
+};
+
+static int veinfo_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &veinfo_seq_op);
+}
+
+static struct file_operations proc_veinfo_operations = {
+        .open		= veinfo_open,
+        .read		= seq_read,
+        .llseek		= seq_lseek,
+        .release	= seq_release,
+};
+
+static void *veip_seq_start(struct seq_file *m, loff_t *pos)
+{
+	loff_t l;
+	struct list_head *p;
+	int i;
+
+	l = *pos;
+	write_lock_irq(&veip_hash_lock);
+	if (l == 0)
+		return ip_entry_hash_table;
+	for (i = 0; i < VEIP_HASH_SZ; i++) {
+		list_for_each(p, ip_entry_hash_table + i) {
+			if (--l == 0)
+				return p;
+		}
+	}
+	return NULL;
+}
+
+static void *veip_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct list_head *p;
+
+	p = (struct list_head *)v;
+	while (1) {
+		p = p->next;
+		if (p < ip_entry_hash_table ||
+		    p >= ip_entry_hash_table + VEIP_HASH_SZ) {
+			(*pos)++;
+			return p;
+		}
+		if (++p >= ip_entry_hash_table + VEIP_HASH_SZ)
+			return NULL;
+	}
+	return NULL;
+}
+
+static void veip_seq_stop(struct seq_file *m, void *v)
+{
+	write_unlock_irq(&veip_hash_lock);
+}
+
+static struct seq_operations veip_seq_op = {
+        .start	= veip_seq_start,
+        .next	= veip_seq_next,
+        .stop	= veip_seq_stop,
+        .show	= veip_seq_show,
+};
+
+static int veip_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &veip_seq_op);
+}
+
+static struct file_operations proc_veip_operations = {
+        .open		= veip_open,
+        .read		= seq_read,
+        .llseek		= seq_lseek,
+        .release	= seq_release,
+};
+#endif
+
+static int real_ve_ip_map(envid_t veid, int op, struct sockaddr __user *uaddr,
+		int addrlen)
+{
+	int err;
+	struct ve_struct *ve;
+	struct ve_addr_struct addr;
+
+	err = -EPERM;
+	if (!capable(CAP_SETVEID))
+		goto out;
+
+	err = sockaddr_to_veaddr(uaddr, addrlen, &addr);
+	if (err < 0)
+		goto out;
+
+	switch (op)
+	{
+		case VE_IP_ADD:
+			ve = get_ve_by_id(veid);
+			err = -ESRCH;
+			if (!ve)
+				goto out;
+
+			down_read(&ve->op_sem);
+			if (ve->is_running)
+				err = veip_entry_add(ve, &addr);
+			up_read(&ve->op_sem);
+			put_ve(ve);
+			break;
+
+		case VE_IP_DEL:
+			err = veip_entry_del(veid, &addr);
+			break;
+		default:
+			err = -EINVAL;
+	}
+
+out:
+	return err;
+}
+
+int venet_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int err;
+
+	err = -ENOTTY;
+	switch(cmd) {
+	case VENETCTL_VE_IP_MAP: {
+		struct vzctl_ve_ip_map s;
+		err = -EFAULT;
+		if (copy_from_user(&s, (void __user *)arg, sizeof(s)))
+			break;
+		err = real_ve_ip_map(s.veid, s.op, s.addr, s.addrlen);
+		break;
+	}
+	}
+	return err;
+}
+
+#ifdef CONFIG_COMPAT
+int compat_venet_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int err;
+
+	switch(cmd) {
+	case VENETCTL_COMPAT_VE_IP_MAP: {
+		struct compat_vzctl_ve_ip_map cs;
+
+		err = -EFAULT;
+		if (copy_from_user(&cs, (void *)arg, sizeof(cs)))
+			break;
+
+		err = real_ve_ip_map(cs.veid, cs.op, compat_ptr(cs.addr),
+				cs.addrlen);
+		break;
+	}
+	default:
+		err = venet_ioctl(file, cmd, arg);
+		break;
+	}
+	return err;
+}
+#endif
+
+static struct vzioctlinfo venetcalls = {
+	.type		= VENETCTLTYPE,
+	.ioctl		= venet_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= compat_venet_ioctl,
+#endif
+	.owner		= THIS_MODULE,
+};
+
+int venet_dev_start(struct ve_struct *env)
+{
+	struct net_device *dev_venet;
+	int err;
+
+	dev_venet = alloc_netdev(0, "venet%d", venet_setup);
+	if (!dev_venet)
+		return -ENOMEM;
+	err = dev_alloc_name(dev_venet, dev_venet->name);
+	if (err<0)
+		goto err;
+	if ((err = register_netdev(dev_venet)) != 0)
+		goto err;
+	env->_venet_dev = dev_venet;
+	return 0;
+err:
+	free_netdev(dev_venet);
+	printk(KERN_ERR "VENET initialization error err=%d\n", err);
+	return err;
+}
+
+static int venet_start(void *data)
+{
+	struct ve_struct *env;
+	int err;
+
+	env = (struct ve_struct *)data;
+	if (env->veip)
+		return -EEXIST;
+
+	err = veip_start(env);
+	if (err != 0)
+		return err;
+
+	err = venet_dev_start(env);
+	if (err)
+		goto err_free;
+	return 0;
+
+err_free:
+	veip_stop(env);
+	return err;
+}
+
+static void venet_stop(void *data)
+{
+	struct ve_struct *env;
+	struct net_device *dev;
+
+	env = (struct ve_struct *)data;
+	veip_stop(env);
+
+	dev = env->_venet_dev;
+	if (dev == NULL)
+		return;
+
+	unregister_netdev(dev);
+	env->_venet_dev = NULL;
+	free_netdev(dev);
+}
+
+static struct ve_hook venet_ve_hook = {
+	.init	= venet_start,
+	.fini	= venet_stop,
+	.owner	= THIS_MODULE,
+};
+
+__init int venet_init(void)
+{
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *de;
+#endif
+	int i, err;
+
+	if (get_ve0()->_venet_dev != NULL)
+		return -EEXIST;
+
+	for (i = 0; i < VEIP_HASH_SZ; i++)
+		INIT_LIST_HEAD(ip_entry_hash_table + i);
+
+	err = venet_start(get_ve0());
+	if (err)
+		return err;
+
+#ifdef CONFIG_PROC_FS
+	de = create_proc_glob_entry_mod("vz/veinfo",
+			S_IFREG|S_IRUSR, NULL, THIS_MODULE);
+	if (de)
+		de->proc_fops = &proc_veinfo_operations;
+	else
+		printk(KERN_WARNING "venet: can't make veinfo proc entry\n");
+
+	de = create_proc_entry_mod("vz/veip", 
+			S_IFREG|S_IRUSR, NULL, THIS_MODULE);
+	if (de)
+		de->proc_fops = &proc_veip_operations;
+	else
+		printk(KERN_WARNING "venet: can't make veip proc entry\n");
+#endif
+
+	ve_hook_register(VE_SS_CHAIN, &venet_ve_hook);
+	vzioctl_register(&venetcalls);
+	return 0;
+}
+
+__exit void venet_exit(void)
+{
+	vzioctl_unregister(&venetcalls);
+	ve_hook_unregister(&venet_ve_hook);
+
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("vz/veip", NULL);
+	remove_proc_entry("vz/veinfo", NULL);
+#endif
+	venet_stop(get_ve0());
+	veip_cleanup();
+}
+
+module_init(venet_init);
+module_exit(venet_exit);
diff -upr linux-2.6.16.46-0.12.orig/drivers/net/veth.c linux-2.6.16.46-0.12-027test011/drivers/net/veth.c
--- linux-2.6.16.46-0.12.orig/drivers/net/veth.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/drivers/net/veth.c	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,688 @@
+/*
+ *  veth.c
+ *
+ *  Copyright (C) 2006  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+/*
+ * Virtual ethernet device used to change VE ownership on packets
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/in.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/tcp.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/unistd.h>
+
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <net/ip.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/if_ether.h>	/* For the statistics structure. */
+#include <linux/if_arp.h>	/* For ARPHRD_ETHER */
+#include <linux/ve_proto.h>
+#include <linux/vzctl.h>
+#include <linux/vzctl_veth.h>
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/vzcalluser.h>
+#include <linux/nfcalls.h>
+
+struct veth_struct
+{
+	struct net_device_stats stats;
+	struct net_device	*pair;
+	struct list_head	hwaddr_list;
+	struct net_device_stats	*real_stats;
+	int			allow_mac_change;
+};
+
+struct list_head veth_hwaddr_list;
+rwlock_t ve_hwaddr_lock = RW_LOCK_UNLOCKED;
+DECLARE_MUTEX(hwaddr_sem);
+
+#define veth_from_netdev(dev) \
+	((struct veth_struct *)(netdev_priv(dev)))
+#define veth_to_netdev(veth) \
+	((struct net_device*)((char*)veth - \
+	(unsigned long)netdev_priv(NULL)))
+
+static inline struct net_device_stats *
+veth_stats(struct net_device *dev, int cpuid)
+{
+	return per_cpu_ptr(veth_from_netdev(dev)->real_stats, cpuid);
+}
+
+struct net_device * veth_dev_start(char *dev_addr, char *name);
+
+struct veth_struct *hwaddr_entry_lookup(char *name)
+{
+	struct veth_struct *entry;
+	struct list_head *tmp;
+
+	list_for_each(tmp, &veth_hwaddr_list) {
+		entry = list_entry(tmp, struct veth_struct, hwaddr_list);
+		BUG_ON(entry->pair == NULL);
+		if (strncmp(name, entry->pair->name, IFNAMSIZ) == 0)
+			return entry;
+	}
+	return NULL;
+}
+
+int veth_entry_add(struct ve_struct *ve, char *dev_addr, char *name,
+		char *dev_addr_ve, char *name_ve)
+{
+	struct net_device *dev_ve;
+	struct net_device *dev_ve0;
+	struct ve_struct *old_env;
+	char dev_name[IFNAMSIZ];
+	int err;
+
+	down(&hwaddr_sem);
+
+	if (name[0] == '\0')
+		snprintf(dev_name, sizeof(dev_name), "vz%d.%%d", ve->veid);
+	else {
+		memcpy(dev_name, name, IFNAMSIZ - 1);
+		dev_name[IFNAMSIZ - 1] = '\0';
+	}
+	dev_ve0 = veth_dev_start(dev_addr, dev_name);
+	if (IS_ERR(dev_ve0)) {
+		err = PTR_ERR(dev_ve0);
+		goto err;
+	}
+
+	old_env = set_exec_env(ve);
+	if (name_ve[0] == '\0')
+		sprintf(dev_name, "eth%%d");
+	else {
+		memcpy(dev_name, name_ve, IFNAMSIZ - 1);
+		dev_name[IFNAMSIZ - 1] = '\0';
+	}
+	dev_ve = veth_dev_start(dev_addr_ve, dev_name);
+	if (IS_ERR(dev_ve)) {
+		err = PTR_ERR(dev_ve);
+		goto err_ve;
+	}
+	set_exec_env(old_env);
+	veth_from_netdev(dev_ve)->pair = dev_ve0;
+	veth_from_netdev(dev_ve0)->pair = dev_ve;
+
+	write_lock(&ve_hwaddr_lock);
+	list_add(&(veth_from_netdev(dev_ve)->hwaddr_list), &veth_hwaddr_list);
+	write_unlock(&ve_hwaddr_lock);
+
+	up(&hwaddr_sem);
+	return 0;
+
+err_ve:
+	set_exec_env(old_env);
+	unregister_netdev(dev_ve0);
+err:
+	up(&hwaddr_sem);
+	return err;
+}
+
+void veth_pair_del(struct ve_struct *env, struct veth_struct *entry)
+{
+	struct net_device *dev;
+	struct ve_struct *old_env;
+
+	write_lock(&ve_hwaddr_lock);
+	list_del(&entry->hwaddr_list);
+	write_unlock(&ve_hwaddr_lock);
+
+	dev = entry->pair;
+	BUG_ON(entry->pair == NULL);
+
+	veth_from_netdev(dev)->pair = NULL;
+	entry->pair = NULL;
+	rtnl_lock();
+	old_env = set_exec_env(dev->owner_env);
+	dev_close(dev);
+
+	/*
+	 * Now device from VE0 does not send or receive anything,
+	 * i.e. dev->hard_start_xmit won't be called.
+	 */
+	set_exec_env(env);
+	unregister_netdevice(veth_to_netdev(entry));
+	set_exec_env(dev->owner_env);
+	unregister_netdevice(dev);
+	set_exec_env(old_env);
+	rtnl_unlock();
+}
+
+int veth_entry_del(struct ve_struct *ve, char *name)
+{
+	struct veth_struct *found;
+	int err;
+
+	err = -ENODEV;
+	down(&hwaddr_sem);
+	found = hwaddr_entry_lookup(name);
+	if (found == NULL)
+		goto out;
+	if (veth_to_netdev(found)->owner_env != ve)
+		goto out;
+
+	err = 0;
+	veth_pair_del(ve, found);
+
+out:
+	up(&hwaddr_sem);
+	return err;
+}
+
+int veth_allow_change_mac(envid_t veid, char *name, int allow)
+{
+	struct ve_struct *ve;
+	struct veth_struct *found;
+	int err;
+
+	err = -ESRCH;
+	ve = get_ve_by_id(veid);
+	if (!ve)
+		return err;
+
+	down_read(&ve->op_sem);
+	if (!ve->is_running)
+		goto out_ve;
+	err = -ENODEV;
+	down(&hwaddr_sem);
+	found = hwaddr_entry_lookup(name);
+	if (found == NULL)
+		goto out_sem;
+	if (veth_to_netdev(found)->owner_env != ve)
+		goto out_sem;
+
+	err = 0;
+	found->allow_mac_change = allow;
+
+out_sem:
+	up(&hwaddr_sem);
+out_ve:
+	up_read(&ve->op_sem);
+	put_ve(ve);
+	return err;
+}
+
+/*
+ * Device functions
+ */
+
+static int veth_open(struct net_device *dev)
+{
+	return 0;
+}
+
+static int veth_close(struct net_device *master)
+{
+	return 0;
+}
+
+static void veth_destructor(struct net_device *dev)
+{
+	free_percpu(veth_from_netdev(dev)->real_stats);
+	free_netdev(dev);
+}
+
+static struct net_device_stats *get_stats(struct net_device *dev)
+{
+	int i;
+	struct net_device_stats *stats;
+
+	stats = &veth_from_netdev(dev)->stats;
+	memset(stats, 0, sizeof(struct net_device_stats));
+	for (i=0; i < NR_CPUS; i++) {
+		struct net_device_stats *dev_stats;
+
+		if (!cpu_possible(i)) 
+			continue;
+		dev_stats = veth_stats(dev, i);
+		stats->rx_bytes   += dev_stats->rx_bytes;
+		stats->tx_bytes   += dev_stats->tx_bytes;
+		stats->rx_packets += dev_stats->rx_packets;
+		stats->tx_packets += dev_stats->tx_packets;
+	}
+
+	return stats;
+}
+
+/*
+ * The higher levels take care of making this non-reentrant (it's
+ * called with bh's disabled).
+ */
+static int veth_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct net_device_stats *stats;
+	struct net_device *rcv = NULL;
+	struct veth_struct *entry;
+	int length;
+
+	stats = veth_stats(dev, smp_processor_id());
+	if (unlikely(get_exec_env()->disable_net))
+		goto outf;
+
+	entry = veth_from_netdev(dev);
+	rcv = entry->pair;
+	if (!rcv)
+		/* VE going down */
+		goto outf;
+
+	if (!(rcv->flags & IFF_UP)) {
+		/* Target VE does not want to receive packets */
+		goto outf;
+	}
+
+	if (unlikely(rcv->owner_env->disable_net))
+		goto outf;
+	/* Filtering */
+	if (ve_is_super(dev->owner_env) &&
+			!veth_from_netdev(rcv)->allow_mac_change) {
+		/* from VE0 to VEX */
+		if (ve_is_super(rcv->owner_env))
+			goto out;
+		if (is_multicast_ether_addr(
+					((struct ethhdr *)skb->data)->h_dest))
+			goto out;
+		if (compare_ether_addr(((struct ethhdr *)skb->data)->h_dest,
+					rcv->dev_addr))
+			goto outf;
+	} else if (!ve_is_super(dev->owner_env) &&
+			!entry->allow_mac_change) {
+		/* from VE to VE0 */
+		if (compare_ether_addr(((struct ethhdr *)skb->data)->h_source,
+					dev->dev_addr))
+			goto outf;
+	}
+
+out:
+	skb->owner_env = rcv->owner_env;
+
+	skb->dev = rcv;
+	skb->pkt_type = PACKET_HOST;
+	skb->protocol = eth_type_trans(skb, rcv);
+
+	if (skb->protocol != __constant_htons(ETH_P_IP))
+		skb_orphan(skb);
+
+	dst_release(skb->dst);
+	skb->dst = NULL;
+#ifdef CONFIG_NETFILTER
+	nf_conntrack_put(skb->nfct);
+	skb->nfct = NULL;
+#endif
+	length = skb->len;
+
+	netif_rx(skb);
+
+	stats->tx_bytes += length;
+	stats->tx_packets++;
+	if (rcv) {
+		struct net_device_stats *rcv_stats;
+		rcv_stats = veth_stats(rcv, smp_processor_id());
+		rcv_stats->rx_bytes += length;
+		rcv_stats->rx_packets++;
+	}
+
+	return 0;
+
+outf:
+	kfree_skb(skb);
+	stats->tx_dropped++;
+	return 0;
+}
+
+static int veth_set_mac(struct net_device *dev, void *p)
+{
+	struct sockaddr *addr = p;
+
+	if (!ve_is_super(dev->owner_env) &&
+			!veth_from_netdev(dev)->allow_mac_change)
+		return -EPERM;
+	if (netif_running(dev))
+		return -EBUSY;
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+
+	return 0;
+}
+
+int veth_init_dev(struct net_device *dev)
+{
+	dev->hard_start_xmit = veth_xmit;
+	dev->get_stats = get_stats;
+	dev->open = veth_open;
+	dev->stop = veth_close;
+	dev->destructor = veth_destructor;
+
+	ether_setup(dev);
+	dev->set_mac_address = veth_set_mac;
+
+	/* remove setted by ether_setup() handler */
+	dev->change_mtu	= NULL;
+
+	dev->tx_queue_len = 0;
+
+	veth_from_netdev(dev)->real_stats =
+		alloc_percpu(struct net_device_stats);
+	if (veth_from_netdev(dev)->real_stats == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void veth_setup(struct net_device *dev)
+{
+	dev->init = veth_init_dev;
+	/*
+	 * No other features, as they are:
+	 *  - checksumming is required, and nobody else will done our job
+	 */
+	dev->features |= NETIF_F_VENET | NETIF_F_VIRTUAL | NETIF_F_LLTX;
+}
+
+#ifdef CONFIG_PROC_FS
+#define ADDR_FMT "%02x:%02x:%02x:%02x:%02x:%02x"
+#define ADDR_ARG(x) (x)[0],(x)[1],(x)[2],(x)[3],(x)[4],(x)[5]
+static int vehwaddr_seq_show(struct seq_file *m, void *v)
+{
+	struct list_head *p;
+	struct veth_struct *entry;
+
+	p = (struct list_head *)v;
+	if (p == &veth_hwaddr_list) {
+		seq_puts(m, "Version: 1.0\n");
+		return 0;
+	}
+	entry = list_entry(p, struct veth_struct, hwaddr_list);
+	seq_printf(m, ADDR_FMT " %16s ",
+			ADDR_ARG(entry->pair->dev_addr), entry->pair->name);
+	seq_printf(m, ADDR_FMT " %16s %10u %5s\n",
+			ADDR_ARG(veth_to_netdev(entry)->dev_addr),
+			veth_to_netdev(entry)->name,
+			VEID(veth_to_netdev(entry)->owner_env),
+			entry->allow_mac_change ? "allow" : "deny");
+	return 0;
+}
+
+static void *vehwaddr_seq_start(struct seq_file *m, loff_t *pos)
+{
+	loff_t l;
+	struct list_head *p;
+
+	l = *pos;
+	read_lock(&ve_hwaddr_lock);
+	if (l == 0)
+		return &veth_hwaddr_list;
+	list_for_each(p, &veth_hwaddr_list) {
+		if (--l == 0)
+			return p;
+	}
+	return NULL;
+}
+
+static void *vehwaddr_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct list_head *p;
+
+	p = (struct list_head *)v;
+	(*pos)++;
+	return p->next == &veth_hwaddr_list ? NULL : p->next;
+}
+
+static void vehwaddr_seq_stop(struct seq_file *m, void *v)
+{
+	read_unlock(&ve_hwaddr_lock);
+}
+
+static struct seq_operations vehwaddr_seq_op = {
+	.start 	= vehwaddr_seq_start,
+	.next	= vehwaddr_seq_next,
+	.stop	= vehwaddr_seq_stop,
+	.show	= vehwaddr_seq_show
+};
+
+static int vehwaddr_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &vehwaddr_seq_op);
+}
+
+static struct file_operations proc_vehwaddr_operations = {
+	.open		= vehwaddr_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release
+};
+#endif
+
+int real_ve_hwaddr(envid_t veid, int op,
+		unsigned char *dev_addr, int addrlen, char *name,
+		unsigned char *dev_addr_ve, int addrlen_ve, char *name_ve)
+{
+	int err;
+	struct ve_struct *ve;
+	char ve_addr[ETH_ALEN];
+
+	err = -EPERM;
+	if (!capable(CAP_NET_ADMIN))
+		goto out;
+
+	err = -EINVAL;
+	switch (op)
+	{
+		case VE_ETH_ADD:
+			if (addrlen != ETH_ALEN)
+				goto out;
+			if (addrlen_ve != ETH_ALEN && addrlen_ve != 0)
+				goto out;
+			/* If ve addr is not set then we use dev_addr[3] & 0x80 for it */
+			if (addrlen_ve == 0 && (dev_addr[3] & 0x80))
+				goto out;
+			if (addrlen_ve == 0) {
+				memcpy(ve_addr, dev_addr, ETH_ALEN);
+				ve_addr[3] |= 0x80;
+			} else {
+				memcpy(ve_addr, dev_addr_ve, ETH_ALEN);
+			}
+
+			ve = get_ve_by_id(veid);
+			err = -ESRCH;
+			if (!ve)
+				goto out;
+
+			down_read(&ve->op_sem);
+			if (ve->is_running)
+				err = veth_entry_add(ve, dev_addr, name,
+						ve_addr, name_ve);
+			up_read(&ve->op_sem);
+			put_ve(ve);
+			break;
+
+		case VE_ETH_DEL:
+			if (name[0] == '\0')
+				goto out;
+			ve = get_ve_by_id(veid);
+			err = -ESRCH;
+			if (!ve)
+				goto out;
+
+			down_read(&ve->op_sem);
+			if (ve->is_running)
+				err = veth_entry_del(ve, name);
+			up_read(&ve->op_sem);
+			put_ve(ve);
+			break;
+		case VE_ETH_ALLOW_MAC_CHANGE:
+		case VE_ETH_DENY_MAC_CHANGE:
+			err = veth_allow_change_mac(veid, name,
+					op == VE_ETH_ALLOW_MAC_CHANGE);
+			break;
+	}
+
+out:
+	return err;
+}
+
+int veth_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int err;
+
+	err = -ENOTTY;
+	switch(cmd) {
+	    case VETHCTL_VE_HWADDR: {
+			struct vzctl_ve_hwaddr s;
+			err = -EFAULT;
+			if (copy_from_user(&s, (void __user *)arg, sizeof(s)))
+				break;
+			err = real_ve_hwaddr(s.veid, s.op,
+					s.dev_addr, s.addrlen, s.dev_name,
+					s.dev_addr_ve, s.addrlen_ve, s.dev_name_ve);
+		}
+		break;
+	}
+	return err;
+}
+
+static struct vzioctlinfo vethcalls = {
+	.type		= VETHCTLTYPE,
+	.ioctl		= veth_ioctl,
+	.compat_ioctl	= veth_ioctl,
+	.owner		= THIS_MODULE,
+};
+
+struct net_device * veth_dev_start(char *dev_addr, char *name)
+{
+	struct net_device *dev;
+	int err;
+
+	if (!is_valid_ether_addr(dev_addr))
+		return ERR_PTR(-EADDRNOTAVAIL);
+
+	dev = alloc_netdev(sizeof(struct veth_struct), name, veth_setup);
+	if (!dev)
+		return ERR_PTR(-ENOMEM);
+	if (strchr(dev->name, '%')) {
+		err = dev_alloc_name(dev, dev->name);
+		if (err < 0)
+			goto err;
+	}
+	if ((err = register_netdev(dev)) != 0)
+		goto err;
+
+	memcpy(dev->dev_addr, dev_addr, ETH_ALEN);
+	dev->addr_len = ETH_ALEN;
+
+	return dev;
+err:
+	free_netdev(dev);
+	printk(KERN_ERR "%s initialization error err=%d\n", name, err);
+	return ERR_PTR(err);
+}
+
+static int veth_start(void *data)
+{
+	return 0;
+}
+
+static void veth_stop(void *data)
+{
+	struct ve_struct *env;
+	struct veth_struct *entry, *tmp;
+
+	env = (struct ve_struct *)data;
+	down(&hwaddr_sem);
+	list_for_each_entry_safe(entry, tmp, &veth_hwaddr_list, hwaddr_list)
+		if (VEID(env) == VEID(veth_to_netdev(entry)->owner_env))
+			veth_pair_del(env, entry);
+	up(&hwaddr_sem);
+}
+
+static struct ve_hook veth_ve_hook = {
+	.init		= veth_start,
+	.fini		= veth_stop,
+	.owner		= THIS_MODULE,
+};
+
+__init int veth_init(void)
+{
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *de;
+#endif
+
+	INIT_LIST_HEAD(&veth_hwaddr_list);
+
+#ifdef CONFIG_PROC_FS
+	de = create_proc_entry_mod("vz/veth",
+			S_IFREG|S_IRUSR, NULL, THIS_MODULE);
+	if (de)
+		de->proc_fops = &proc_vehwaddr_operations;
+	else
+		printk(KERN_WARNING "veth: can't make vehwaddr proc entry\n");
+#endif
+
+	ve_hook_register(VE_SS_CHAIN, &veth_ve_hook);
+	vzioctl_register(&vethcalls);
+	KSYMRESOLVE(veth_open);
+	KSYMMODRESOLVE(vzethdev);
+	return 0;
+}
+
+__exit void veth_exit(void)
+{
+	struct veth_struct *entry;
+	struct list_head *tmp, *n;
+	struct ve_struct *ve;
+
+	KSYMMODUNRESOLVE(vzethdev);
+	KSYMUNRESOLVE(veth_open);
+	vzioctl_unregister(&vethcalls);
+	ve_hook_unregister(&veth_ve_hook);
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("vz/veth", NULL);
+#endif
+
+	down(&hwaddr_sem);
+	list_for_each_safe(tmp, n, &veth_hwaddr_list) {
+		entry = list_entry(tmp, struct veth_struct, hwaddr_list);
+		ve = get_ve(veth_to_netdev(entry)->owner_env);
+
+		veth_pair_del(ve, entry);
+
+		put_ve(ve);
+	}
+	up(&hwaddr_sem);
+}
+
+module_init(veth_init);
+module_exit(veth_exit);
+
+MODULE_AUTHOR("Andrey Mirkin <amirkin@sw.ru>");
+MODULE_DESCRIPTION("Virtuozzo Virtual Ethernet Device");
+MODULE_LICENSE("GPL v2");
+
diff -upr linux-2.6.16.46-0.12.orig/drivers/pci/probe.c linux-2.6.16.46-0.12-027test011/drivers/pci/probe.c
--- linux-2.6.16.46-0.12.orig/drivers/pci/probe.c	2007-08-24 19:28:20.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/drivers/pci/probe.c	2007-08-28 17:35:31.000000000 +0400
@@ -21,6 +21,7 @@ LIST_HEAD(pci_root_buses);
 EXPORT_SYMBOL(pci_root_buses);
 
 LIST_HEAD(pci_devices);
+EXPORT_SYMBOL(pci_devices);
 
 #ifdef HAVE_PCI_LEGACY
 /**
diff -upr linux-2.6.16.46-0.12.orig/drivers/xen/blktap/blktap.c linux-2.6.16.46-0.12-027test011/drivers/xen/blktap/blktap.c
--- linux-2.6.16.46-0.12.orig/drivers/xen/blktap/blktap.c	2007-08-24 19:28:37.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/drivers/xen/blktap/blktap.c	2007-08-28 17:35:31.000000000 +0400
@@ -443,7 +443,7 @@ void signal_tapdisk(int idx) 
 		return;
 
 	if (info->pid > 0) {
-		ptask = find_task_by_pid(info->pid);
+		ptask = find_task_by_pid_all(info->pid);
 		if (ptask)
 			info->status = CLEANSHUTDOWN;
 	}
diff -upr linux-2.6.16.46-0.12.orig/drivers/xen/netback/loopback.c linux-2.6.16.46-0.12-027test011/drivers/xen/netback/loopback.c
--- linux-2.6.16.46-0.12.orig/drivers/xen/netback/loopback.c	2007-08-24 19:28:36.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/drivers/xen/netback/loopback.c	2007-08-28 17:35:32.000000000 +0400
@@ -61,7 +61,7 @@ module_param(nloopbacks, int, 0);
 MODULE_PARM_DESC(nloopbacks, "Number of netback-loopback devices to create");
 
 struct net_private {
-	struct net_device *loopback_dev;
+	struct net_device *_loopback_dev;
 	struct net_device_stats stats;
 };
 
@@ -142,7 +142,7 @@ static int loopback_start_xmit(struct sk
 	np->stats.tx_packets++;
 
 	/* Switch to loopback context. */
-	dev = np->loopback_dev;
+	dev = np->_loopback_dev;
 	np  = netdev_priv(dev);
 
 	np->stats.rx_bytes += skb->len;
@@ -201,7 +201,7 @@ static void loopback_construct(struct ne
 {
 	struct net_private *np = netdev_priv(dev);
 
-	np->loopback_dev     = lo;
+	np->_loopback_dev     = lo;
 
 	dev->open            = loopback_open;
 	dev->stop            = loopback_close;
diff -upr linux-2.6.16.46-0.12.orig/fs/Kconfig linux-2.6.16.46-0.12-027test011/fs/Kconfig
--- linux-2.6.16.46-0.12.orig/fs/Kconfig	2007-08-24 19:28:37.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/Kconfig	2007-08-28 17:35:33.000000000 +0400
@@ -488,6 +488,38 @@ config QFMT_V2
 	  This quota format allows using quotas with 32-bit UIDs/GIDs. If you
 	  need this functionality say Y here.
 
+config SIM_FS
+	tristate "VPS filesystem"
+	depends on VZ_QUOTA
+	default m
+	help
+	  This file system is a part of Virtuozzo. It intoduces a fake
+	  superblock and blockdev to VE to hide real device and show
+	  statfs results taken from quota.
+
+config VZ_QUOTA
+	tristate "Virtuozzo Disk Quota support"
+	depends on QUOTA
+	default m
+	help
+	  Virtuozzo Disk Quota imposes disk quota on directories with their
+	  files and subdirectories in total.  Such disk quota is used to
+	  account and limit disk usage by Virtuozzo VPS, but also may be used
+	  separately.
+
+config VZ_QUOTA_UNLOAD
+	bool "Unloadable Virtuozzo Disk Quota module"
+	depends on VZ_QUOTA=m
+	default n
+	help
+	  Make Virtuozzo Disk Quota module unloadable.
+	  Doesn't work reliably now.
+
+config VZ_QUOTA_UGID
+	bool "Per-user and per-group quota in Virtuozzo quota partitions"
+	depends on VZ_QUOTA!=n
+	default y
+
 config QUOTACTL
 	bool
 	depends on XFS_QUOTA || QUOTA
diff -upr linux-2.6.16.46-0.12.orig/fs/Makefile linux-2.6.16.46-0.12-027test011/fs/Makefile
--- linux-2.6.16.46-0.12.orig/fs/Makefile	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/Makefile	2007-08-28 17:35:33.000000000 +0400
@@ -41,11 +41,17 @@ obj-$(CONFIG_QUOTA)		+= dquot.o
 obj-$(CONFIG_QFMT_V1)		+= quota_v1.o
 obj-$(CONFIG_QFMT_V2)		+= quota_v2.o
 obj-$(CONFIG_QUOTACTL)		+= quota.o
+obj-$(CONFIG_VZ_QUOTA)		+= vzdquota.o
+vzdquota-y			+= vzdquot.o vzdq_mgmt.o vzdq_ops.o vzdq_tree.o
+vzdquota-$(CONFIG_VZ_QUOTA_UGID) += vzdq_ugid.o
+vzdquota-$(CONFIG_VZ_QUOTA_UGID) += vzdq_file.o
 
 obj-$(CONFIG_DMAPI)		+= dmapi/
 
 obj-$(CONFIG_DNOTIFY)		+= dnotify.o
 
+obj-$(CONFIG_SIM_FS)		+= simfs.o
+
 obj-$(CONFIG_PROC_FS)		+= proc/
 obj-y				+= partitions/
 obj-$(CONFIG_SYSFS)		+= sysfs/
diff -upr linux-2.6.16.46-0.12.orig/fs/aio.c linux-2.6.16.46-0.12-027test011/fs/aio.c
--- linux-2.6.16.46-0.12.orig/fs/aio.c	2007-08-24 19:28:32.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/aio.c	2007-08-28 17:35:33.000000000 +0400
@@ -46,13 +46,16 @@
 #endif
 
 /*------ sysctl variables----*/
-static DEFINE_SPINLOCK(aio_nr_lock);
+DEFINE_SPINLOCK(aio_nr_lock);
 unsigned long aio_nr;		/* current system wide number of aio requests */
 unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */
+EXPORT_SYMBOL_GPL(aio_nr_lock);
+EXPORT_SYMBOL_GPL(aio_nr);
 /*----end sysctl variables---*/
 
 static kmem_cache_t	*kiocb_cachep;
-static kmem_cache_t	*kioctx_cachep;
+kmem_cache_t	*kioctx_cachep;
+EXPORT_SYMBOL_GPL(kioctx_cachep);
 
 static struct workqueue_struct *aio_wq;
 
@@ -63,7 +66,7 @@ static DECLARE_WORK(fput_work, aio_fput_
 static DEFINE_SPINLOCK(fput_lock);
 static LIST_HEAD(fput_head);
 
-static void aio_kick_handler(void *);
+void aio_kick_handler(void *);
 static void aio_queue_work(struct kioctx *);
 
 /* aio_setup
@@ -298,7 +301,7 @@ static void aio_cancel_all(struct kioctx
 	spin_unlock_irq(&ctx->ctx_lock);
 }
 
-static void wait_for_all_aios(struct kioctx *ctx)
+void wait_for_all_aios(struct kioctx *ctx)
 {
 	struct task_struct *tsk = current;
 	DECLARE_WAITQUEUE(wait, tsk);
@@ -315,6 +318,7 @@ static void wait_for_all_aios(struct kio
 	__set_task_state(tsk, TASK_RUNNING);
 	remove_wait_queue(&ctx->wait, &wait);
 }
+EXPORT_SYMBOL_GPL(wait_for_all_aios);
 
 /* wait_on_sync_kiocb:
  *	Waits on the given sync kiocb to complete.
@@ -861,7 +865,7 @@ static inline void aio_run_all_iocbs(str
  *      space.
  * Run on aiod's context.
  */
-static void aio_kick_handler(void *data)
+void aio_kick_handler(void *data)
 {
 	struct kioctx *ctx = data;
 	mm_segment_t oldfs = get_fs();
@@ -880,6 +884,7 @@ static void aio_kick_handler(void *data)
 	if (requeue)
 		queue_work(aio_wq, &ctx->wq);
 }
+EXPORT_SYMBOL_GPL(aio_kick_handler);
 
 
 /*
diff -upr linux-2.6.16.46-0.12.orig/fs/autofs/autofs_i.h linux-2.6.16.46-0.12-027test011/fs/autofs/autofs_i.h
--- linux-2.6.16.46-0.12.orig/fs/autofs/autofs_i.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/autofs/autofs_i.h	2007-08-28 17:35:31.000000000 +0400
@@ -124,7 +124,7 @@ static inline struct autofs_sb_info *aut
    filesystem without "magic".) */
 
 static inline int autofs_oz_mode(struct autofs_sb_info *sbi) {
-	return sbi->catatonic || process_group(current) == sbi->oz_pgrp;
+	return sbi->catatonic || virt_pgid(current) == sbi->oz_pgrp;
 }
 
 /* Hash operations */
diff -upr linux-2.6.16.46-0.12.orig/fs/autofs/init.c linux-2.6.16.46-0.12-027test011/fs/autofs/init.c
--- linux-2.6.16.46-0.12.orig/fs/autofs/init.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/autofs/init.c	2007-08-28 17:35:31.000000000 +0400
@@ -25,6 +25,7 @@ static struct file_system_type autofs_fs
 	.name		= "autofs",
 	.get_sb		= autofs_get_sb,
 	.kill_sb	= kill_anon_super,
+	.fs_flags	= FS_VIRTUALIZED,
 };
 
 static int __init init_autofs_fs(void)
diff -upr linux-2.6.16.46-0.12.orig/fs/autofs/inode.c linux-2.6.16.46-0.12-027test011/fs/autofs/inode.c
--- linux-2.6.16.46-0.12.orig/fs/autofs/inode.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/autofs/inode.c	2007-08-28 17:35:31.000000000 +0400
@@ -66,7 +66,7 @@ static int parse_options(char *options, 
 
 	*uid = current->uid;
 	*gid = current->gid;
-	*pgrp = process_group(current);
+	*pgrp = virt_pgid(current);
 
 	*minproto = *maxproto = AUTOFS_PROTO_VERSION;
 
@@ -138,7 +138,7 @@ int autofs_fill_super(struct super_block
 	sbi->magic = AUTOFS_SBI_MAGIC;
 	sbi->catatonic = 0;
 	sbi->exp_timeout = 0;
-	sbi->oz_pgrp = process_group(current);
+	sbi->oz_pgrp = virt_pgid(current);
 	autofs_initialize_hash(&sbi->dirhash);
 	sbi->queues = NULL;
 	memset(sbi->symlink_bitmap, 0, sizeof(long)*AUTOFS_SYMLINK_BITMAP_LEN);
diff -upr linux-2.6.16.46-0.12.orig/fs/autofs/root.c linux-2.6.16.46-0.12-027test011/fs/autofs/root.c
--- linux-2.6.16.46-0.12.orig/fs/autofs/root.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/autofs/root.c	2007-08-28 17:35:31.000000000 +0400
@@ -354,7 +354,7 @@ static int autofs_root_unlink(struct ino
 
 	/* This allows root to remove symlinks */
 	lock_kernel();
-	if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) ) {
+	if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) ) {
 		unlock_kernel();
 		return -EACCES;
 	}
@@ -541,7 +541,7 @@ static int autofs_root_ioctl(struct inod
 	     _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT )
 		return -ENOTTY;
 	
-	if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) )
+	if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) )
 		return -EPERM;
 	
 	switch(cmd) {
diff -upr linux-2.6.16.46-0.12.orig/fs/autofs4/autofs_i.h linux-2.6.16.46-0.12-027test011/fs/autofs4/autofs_i.h
--- linux-2.6.16.46-0.12.orig/fs/autofs4/autofs_i.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/autofs4/autofs_i.h	2007-08-28 17:35:31.000000000 +0400
@@ -90,6 +90,10 @@ struct autofs_wait_queue {
 
 #define AUTOFS_SBI_MAGIC 0x6d4a556d
 
+/* flags for userspace automount daemon */
+#define AUTOFS_DEAMON_32BIT 0		/* automount is a 32bit process */
+#define _AUTOFS_DEAMON_32BIT		(1 << AUTOFS_DEAMON_32BIT)
+
 struct autofs_sb_info {
 	u32 magic;
 	struct dentry *root;
@@ -105,6 +109,7 @@ struct autofs_sb_info {
 	struct semaphore wq_sem;
 	spinlock_t fs_lock;
 	struct autofs_wait_queue *queues; /* Wait queue pointer */
+	u32 flags; /* flags for userspace automount daemon */
 };
 
 static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb)
@@ -122,7 +127,7 @@ static inline struct autofs_info *autofs
    filesystem without "magic".) */
 
 static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
-	return sbi->catatonic || process_group(current) == sbi->oz_pgrp;
+	return sbi->catatonic || virt_pgid(current) == sbi->oz_pgrp;
 }
 
 /* Does a dentry have some pending activity? */
diff -upr linux-2.6.16.46-0.12.orig/fs/autofs4/init.c linux-2.6.16.46-0.12-027test011/fs/autofs4/init.c
--- linux-2.6.16.46-0.12.orig/fs/autofs4/init.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/autofs4/init.c	2007-08-28 17:35:31.000000000 +0400
@@ -25,6 +25,7 @@ static struct file_system_type autofs_fs
 	.name		= "autofs",
 	.get_sb		= autofs_get_sb,
 	.kill_sb	= kill_anon_super,
+	.fs_flags	= FS_VIRTUALIZED,
 };
 
 static int __init init_autofs4_fs(void)
diff -upr linux-2.6.16.46-0.12.orig/fs/autofs4/inode.c linux-2.6.16.46-0.12-027test011/fs/autofs4/inode.c
--- linux-2.6.16.46-0.12.orig/fs/autofs4/inode.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/autofs4/inode.c	2007-08-28 17:35:31.000000000 +0400
@@ -179,7 +179,7 @@ static int parse_options(char *options, 
 
 	*uid = current->uid;
 	*gid = current->gid;
-	*pgrp = process_group(current);
+	*pgrp = virt_pgid(current);
 
 	*minproto = AUTOFS_MIN_PROTO_VERSION;
 	*maxproto = AUTOFS_MAX_PROTO_VERSION;
@@ -252,6 +252,7 @@ int autofs4_fill_super(struct super_bloc
 	struct autofs_sb_info *sbi;
 	struct autofs_info *ino;
 	int minproto, maxproto;
+	struct task_struct *tsk = current;
 
 	sbi = (struct autofs_sb_info *) kmalloc(sizeof(*sbi), GFP_KERNEL);
 	if ( !sbi )
@@ -265,10 +266,16 @@ int autofs4_fill_super(struct super_bloc
 	sbi->root = NULL;
 	sbi->catatonic = 0;
 	sbi->exp_timeout = 0;
-	sbi->oz_pgrp = process_group(current);
+	sbi->oz_pgrp = virt_pgid(current);
 	sbi->sb = s;
 	sbi->version = 0;
 	sbi->sub_version = 0;
+#ifdef __x86_64__
+	if (tsk->thread_info->flags & _TIF_IA32) {
+		/* mark that automount daemon is 32 bit */
+		sbi->flags |= _AUTOFS_DEAMON_32BIT;
+	}
+#endif
 	init_MUTEX(&sbi->wq_sem);
 	spin_lock_init(&sbi->fs_lock);
 	sbi->queues = NULL;
diff -upr linux-2.6.16.46-0.12.orig/fs/autofs4/root.c linux-2.6.16.46-0.12-027test011/fs/autofs4/root.c
--- linux-2.6.16.46-0.12.orig/fs/autofs4/root.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/autofs4/root.c	2007-08-28 17:35:31.000000000 +0400
@@ -592,7 +592,7 @@ static int autofs4_dir_unlink(struct ino
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
 	
 	/* This allows root to remove symlinks */
-	if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) )
+	if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) )
 		return -EACCES;
 
 	dput(ino->dentry);
@@ -784,7 +784,7 @@ static int autofs4_root_ioctl(struct ino
 	     _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT )
 		return -ENOTTY;
 	
-	if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) )
+	if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) )
 		return -EPERM;
 	
 	switch(cmd) {
diff -upr linux-2.6.16.46-0.12.orig/fs/autofs4/waitq.c linux-2.6.16.46-0.12-027test011/fs/autofs4/waitq.c
--- linux-2.6.16.46-0.12.orig/fs/autofs4/waitq.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/autofs4/waitq.c	2007-08-28 17:35:30.000000000 +0400
@@ -99,23 +99,45 @@ static void autofs4_notify_daemon(struct
 	pkt.hdr.proto_version = sbi->version;
 	pkt.hdr.type = type;
 	if (type == autofs_ptype_missing) {
-		struct autofs_packet_missing *mp = &pkt.missing;
+		if (sbi->flags & _AUTOFS_DEAMON_32BIT) {
+			struct autofs_packet_missing_32bit *mp = &pkt.missing_32bit;
 
-		pktsz = sizeof(*mp);
+			pktsz = sizeof(*mp);
 
-		mp->wait_queue_token = wq->wait_queue_token;
-		mp->len = wq->len;
-		memcpy(mp->name, wq->name, wq->len);
-		mp->name[wq->len] = '\0';
+			mp->wait_queue_token = wq->wait_queue_token;
+			mp->len = wq->len;
+			memcpy(mp->name, wq->name, wq->len);
+			mp->name[wq->len] = '\0';
+		} else {
+			struct autofs_packet_missing *mp = &pkt.missing;
+
+			pktsz = sizeof(*mp);
+
+			mp->wait_queue_token = wq->wait_queue_token;
+			mp->len = wq->len;
+			memcpy(mp->name, wq->name, wq->len);
+			mp->name[wq->len] = '\0';
+		}
 	} else if (type == autofs_ptype_expire_multi) {
-		struct autofs_packet_expire_multi *ep = &pkt.expire_multi;
+		if (sbi->flags & _AUTOFS_DEAMON_32BIT) {
+			struct autofs_packet_expire_multi_32bit *ep = &pkt.expire_multi_32bit;
 
-		pktsz = sizeof(*ep);
+			pktsz = sizeof(*ep);
 
-		ep->wait_queue_token = wq->wait_queue_token;
-		ep->len = wq->len;
-		memcpy(ep->name, wq->name, wq->len);
-		ep->name[wq->len] = '\0';
+			ep->wait_queue_token = wq->wait_queue_token;
+			ep->len = wq->len;
+			memcpy(ep->name, wq->name, wq->len);
+			ep->name[wq->len] = '\0';
+		} else {
+			struct autofs_packet_expire_multi *ep = &pkt.expire_multi;
+
+			pktsz = sizeof(*ep);
+
+			ep->wait_queue_token = wq->wait_queue_token;
+			ep->len = wq->len;
+			memcpy(ep->name, wq->name, wq->len);
+			ep->name[wq->len] = '\0';
+		}
 	} else {
 		printk("autofs4_notify_daemon: bad type %d!\n", type);
 		return;
diff -upr linux-2.6.16.46-0.12.orig/fs/binfmt_aout.c linux-2.6.16.46-0.12-027test011/fs/binfmt_aout.c
--- linux-2.6.16.46-0.12.orig/fs/binfmt_aout.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/binfmt_aout.c	2007-08-28 17:35:33.000000000 +0400
@@ -446,9 +446,11 @@ beyond_if:
 #endif
 	start_thread(regs, ex.a_entry, current->mm->start_stack);
 	if (unlikely(current->ptrace & PT_PTRACED)) {
-		if (current->ptrace & PT_TRACE_EXEC)
+		if (current->ptrace & PT_TRACE_EXEC) {
+			set_pn_state(current, PN_STOP_EXEC);
 			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
-		else
+			clear_pn_state(current);
+		} else
 			send_sig(SIGTRAP, current, 0);
 	}
 	return 0;
diff -upr linux-2.6.16.46-0.12.orig/fs/binfmt_elf.c linux-2.6.16.46-0.12-027test011/fs/binfmt_elf.c
--- linux-2.6.16.46-0.12.orig/fs/binfmt_elf.c	2007-08-24 19:28:10.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/binfmt_elf.c	2007-08-28 17:35:33.000000000 +0400
@@ -361,7 +361,7 @@ static unsigned long load_elf_interp(str
 	eppnt = elf_phdata;
 	for (i=0; i<interp_elf_ex->e_phnum; i++, eppnt++) {
 	  if (eppnt->p_type == PT_LOAD) {
-	    int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
+	    int elf_type = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECPRIO;
 	    int elf_prot = 0;
 	    unsigned long vaddr = 0;
 	    unsigned long k, map_addr;
@@ -678,6 +678,15 @@ static int load_elf_binary(struct linux_
 			retval = PTR_ERR(interpreter);
 			if (IS_ERR(interpreter))
 				goto out_free_interp;
+
+			/*
+			 * If the binary is not readable than enforce
+			 * mm->dumpable = 0 regardless of the interpreter's
+			 * permissions.
+			 */
+			if (file_permission(interpreter, MAY_READ) < 0)
+				bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
+
 			retval = kernel_read(interpreter, 0, bprm->buf, BINPRM_BUF_SIZE);
 			if (retval != BINPRM_BUF_SIZE) {
 				if (retval >= 0)
@@ -839,7 +848,7 @@ static int load_elf_binary(struct linux_
 		if (elf_ppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
 		if (elf_ppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
 
-		elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE;
+		elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE|MAP_EXECPRIO;
 
 		vaddr = elf_ppnt->p_vaddr;
 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
@@ -854,6 +863,8 @@ static int load_elf_binary(struct linux_
 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags);
 		if (BAD_ADDR(error)) {
 			send_sig(SIGKILL, current, 0);
+			retval = IS_ERR((void *)error) ?
+				PTR_ERR((void*)error) : -EINVAL;
 			goto out_free_dentry;
 		}
 
@@ -881,6 +892,7 @@ static int load_elf_binary(struct linux_
 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
 			/* set_brk can never work.  Avoid overflows.  */
 			send_sig(SIGKILL, current, 0);
+			retval = -EINVAL;
 			goto out_free_dentry;
 		}
 
@@ -1004,9 +1016,11 @@ static int load_elf_binary(struct linux_
 
 	start_thread(regs, elf_entry, bprm->p);
 	if (unlikely(current->ptrace & PT_PTRACED)) {
-		if (current->ptrace & PT_TRACE_EXEC)
+		if (current->ptrace & PT_TRACE_EXEC) {
+			set_pn_state(current, PN_STOP_EXEC);
 			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
-		else
+			clear_pn_state(current);
+		} else
 			send_sig(SIGTRAP, current, 0);
 	}
 	retval = 0;
@@ -1026,8 +1040,13 @@ out_free_file:
 	sys_close(elf_exec_fileno);
 out_free_fh:
 	if (files) {
-		put_files_struct(current->files);
+		struct files_struct *old;
+
+		old = current->files;
+		task_lock(current);
 		current->files = files;
+		task_unlock(current);
+		put_files_struct(old);
 	}
 out_free_ph:
 	kfree(elf_phdata);
@@ -1285,10 +1304,10 @@ static void fill_prstatus(struct elf_prs
 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
 	prstatus->pr_sigpend = p->pending.signal.sig[0];
 	prstatus->pr_sighold = p->blocked.sig[0];
-	prstatus->pr_pid = p->pid;
-	prstatus->pr_ppid = p->parent->pid;
-	prstatus->pr_pgrp = process_group(p);
-	prstatus->pr_sid = p->signal->session;
+	prstatus->pr_pid = virt_pid(p);
+	prstatus->pr_ppid = virt_pid(p->parent);
+	prstatus->pr_pgrp = virt_pgid(p);
+	prstatus->pr_sid = virt_sid(p);
 	if (thread_group_leader(p)) {
 		/*
 		 * This is the record for the group leader.  Add in the
@@ -1331,10 +1350,10 @@ static int fill_psinfo(struct elf_prpsin
 			psinfo->pr_psargs[i] = ' ';
 	psinfo->pr_psargs[len] = 0;
 
-	psinfo->pr_pid = p->pid;
-	psinfo->pr_ppid = p->parent->pid;
-	psinfo->pr_pgrp = process_group(p);
-	psinfo->pr_sid = p->signal->session;
+	psinfo->pr_pid = virt_pid(p);
+	psinfo->pr_ppid = virt_pid(p->parent);
+	psinfo->pr_pgrp = virt_pgid(p);
+	psinfo->pr_sid = virt_sid(p);
 
 	i = p->state ? ffz(~p->state) + 1 : 0;
 	psinfo->pr_state = i;
@@ -1467,7 +1486,7 @@ static int elf_core_dump(long signr, str
 	if (signr) {
 		struct elf_thread_status *tmp;
 		read_lock(&tasklist_lock);
-		do_each_thread(g,p)
+		do_each_thread_ve(g,p)
 			if (current->mm == p->mm && current != p) {
 				tmp = kmalloc(sizeof(*tmp), GFP_ATOMIC);
 				if (!tmp) {
@@ -1479,7 +1498,7 @@ static int elf_core_dump(long signr, str
 				tmp->thread = p;
 				list_add(&tmp->list, &thread_list);
 			}
-		while_each_thread(g,p);
+		while_each_thread_ve(g,p);
 		read_unlock(&tasklist_lock);
 		list_for_each(t, &thread_list) {
 			struct elf_thread_status *tmp;
diff -upr linux-2.6.16.46-0.12.orig/fs/binfmt_elf_fdpic.c linux-2.6.16.46-0.12-027test011/fs/binfmt_elf_fdpic.c
--- linux-2.6.16.46-0.12.orig/fs/binfmt_elf_fdpic.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/binfmt_elf_fdpic.c	2007-08-28 17:35:30.000000000 +0400
@@ -212,6 +212,14 @@ static int load_elf_fdpic_binary(struct 
 				goto error;
 			}
 
+			/*
+			 * If the binary is not readable than enforce
+			 * mm->dumpable = 0 regardless of the interpreter's
+			 * permissions.
+			 */
+			if (file_permission(interpreter, MAY_READ) < 0)
+				bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
+
 			retval = kernel_read(interpreter, 0, bprm->buf, BINPRM_BUF_SIZE);
 			if (retval < 0)
 				goto error;
diff -upr linux-2.6.16.46-0.12.orig/fs/binfmt_misc.c linux-2.6.16.46-0.12-027test011/fs/binfmt_misc.c
--- linux-2.6.16.46-0.12.orig/fs/binfmt_misc.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/binfmt_misc.c	2007-08-28 17:35:29.000000000 +0400
@@ -216,8 +216,13 @@ _error:
 	bprm->interp_data = 0;
 _unshare:
 	if (files) {
-		put_files_struct(current->files);
+		struct files_struct *old;
+
+		old = current->files;
+		task_lock(current);
 		current->files = files;
+		task_unlock(current);
+		put_files_struct(old);
 	}
 	goto _ret;
 }
diff -upr linux-2.6.16.46-0.12.orig/fs/block_dev.c linux-2.6.16.46-0.12-027test011/fs/block_dev.c
--- linux-2.6.16.46-0.12.orig/fs/block_dev.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/block_dev.c	2007-08-28 17:35:31.000000000 +0400
@@ -23,6 +23,7 @@
 #include <linux/mount.h>
 #include <linux/uio.h>
 #include <linux/namei.h>
+#include <linux/ve_proto.h>
 #include <asm/uaccess.h>
 
 struct bdev_inode {
@@ -580,9 +581,15 @@ static int do_open(struct block_device *
 {
 	struct module *owner = NULL;
 	struct gendisk *disk;
-	int ret = -ENXIO;
+	int ret;
 	int part;
 
+	ret = get_device_perms_ve(S_IFBLK, bdev->bd_dev,
+				  file->f_mode & (FMODE_READ | FMODE_WRITE));
+	if (ret)
+	        return ret;
+
+	ret = -ENXIO;
 	file->f_mapping = bdev->bd_inode->i_mapping;
 	lock_kernel();
 	disk = get_gendisk(bdev->bd_dev, &part);
@@ -851,7 +858,7 @@ EXPORT_SYMBOL(ioctl_by_bdev);
  * namespace if possible and return it.  Return ERR_PTR(error)
  * otherwise.
  */
-struct block_device *lookup_bdev(const char *path)
+struct block_device *lookup_bdev(const char *path, int mode)
 {
 	struct block_device *bdev;
 	struct inode *inode;
@@ -869,6 +876,11 @@ struct block_device *lookup_bdev(const c
 	error = -ENOTBLK;
 	if (!S_ISBLK(inode->i_mode))
 		goto fail;
+
+	error = get_device_perms_ve(S_IFBLK, inode->i_rdev, mode);
+	if (error)
+		goto fail;
+
 	error = -EACCES;
 	if (nd.mnt->mnt_flags & MNT_NODEV)
 		goto fail;
@@ -900,12 +912,13 @@ struct block_device *open_bdev_excl(cons
 	mode_t mode = FMODE_READ;
 	int error = 0;
 
-	bdev = lookup_bdev(path);
+	if (!(flags & MS_RDONLY))
+		mode |= FMODE_WRITE;
+
+	bdev = lookup_bdev(path, mode);
 	if (IS_ERR(bdev))
 		return bdev;
 
-	if (!(flags & MS_RDONLY))
-		mode |= FMODE_WRITE;
 	error = blkdev_get(bdev, mode, 0);
 	if (error)
 		return ERR_PTR(error);
diff -upr linux-2.6.16.46-0.12.orig/fs/buffer.c linux-2.6.16.46-0.12-027test011/fs/buffer.c
--- linux-2.6.16.46-0.12.orig/fs/buffer.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/buffer.c	2007-08-28 17:35:30.000000000 +0400
@@ -36,6 +36,7 @@
 #include <linux/hash.h>
 #include <linux/suspend.h>
 #include <linux/buffer_head.h>
+#include <linux/task_io_accounting_ops.h>
 #include <linux/bio.h>
 #include <linux/notifier.h>
 #include <linux/cpu.h>
@@ -43,6 +44,8 @@
 #include <linux/mpage.h>
 #include <linux/bit_spinlock.h>
 
+#include <ub/beancounter.h>
+
 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
 static void invalidate_bh_lrus(void);
 
@@ -292,7 +295,14 @@ static void do_sync(unsigned long wait)
 
 asmlinkage long sys_sync(void)
 {
+	struct user_beancounter *ub;
+
+	ub = get_exec_ub();
+	ub_percpu_inc(ub, sync);
+
 	do_sync(1);
+
+	ub_percpu_inc(ub, sync_done);
 	return 0;
 }
 
@@ -335,6 +345,7 @@ static long do_fsync(unsigned int fd, in
 	struct file * file;
 	struct address_space *mapping;
 	int ret, err;
+	struct user_beancounter *ub;
 
 	ret = -EBADF;
 	file = fget(fd);
@@ -349,6 +360,12 @@ static long do_fsync(unsigned int fd, in
 
 	mapping = file->f_mapping;
 
+	ub = get_exec_ub();
+	if (datasync)
+		ub_percpu_inc(ub, fdsync);
+	else
+		ub_percpu_inc(ub, fsync);
+
 	ret = filemap_fdatawrite(mapping);
 
 	/*
@@ -364,6 +381,10 @@ static long do_fsync(unsigned int fd, in
 	if (!ret)
 		ret = err;
 
+	if (datasync)
+		ub_percpu_inc(ub, fdsync_done);
+	else
+		ub_percpu_inc(ub, fsync_done);
 out_putf:
 	fput(file);
 out:
@@ -846,6 +867,7 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
  */
 int __set_page_dirty_buffers(struct page *page)
 {
+	int acct;
 	struct address_space * const mapping = page_mapping(page);
 
 	if (unlikely(!mapping))
@@ -865,15 +887,20 @@ int __set_page_dirty_buffers(struct page
 
 	if (!TestSetPageDirty(page)) {
 		write_lock_irq(&mapping->tree_lock);
+		acct = 0;
 		if (page->mapping) {	/* Race with truncate? */
-			if (mapping_cap_account_dirty(mapping))
+			if (mapping_cap_account_dirty(mapping)) {
 				inc_page_state(nr_dirty);
+				acct = 1;
+			}
 			radix_tree_tag_set(&mapping->page_tree,
 						page_index(page),
 						PAGECACHE_TAG_DIRTY);
 		}
 		write_unlock_irq(&mapping->tree_lock);
 		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+		if (acct)
+			task_io_account_write(page, PAGE_CACHE_SIZE, 0);
 	}
 	
 	return 0;
@@ -2894,6 +2921,9 @@ void ll_rw_block(int rw, int nr, struct 
 {
 	int i;
 
+	if (likely(nr) && !(rw & WRITE))
+		task_io_account_read(nr * bhs[0]->b_size);
+
 	for (i = 0; i < nr; i++) {
 		struct buffer_head *bh = bhs[i];
 
@@ -3029,8 +3059,13 @@ int try_to_free_buffers(struct page *pag
 		 * could encounter a non-uptodate page, which is unresolvable.
 		 * This only applies in the rare case where try_to_free_buffers
 		 * succeeds but the page is not freed.
+		 *
+		 * Also, during truncate, discard_buffer will have marked all
+		 * the page's buffers clean.  We discover that here and clean
+		 * the page also.
 		 */
-		clear_page_dirty(page);
+		if (test_clear_page_dirty(page))
+			task_io_account_cancelled_write(PAGE_CACHE_SIZE);
 	}
 	spin_unlock(&mapping->private_lock);
 out:
diff -upr linux-2.6.16.46-0.12.orig/fs/char_dev.c linux-2.6.16.46-0.12-027test011/fs/char_dev.c
--- linux-2.6.16.46-0.12.orig/fs/char_dev.c	2007-08-24 19:28:06.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/char_dev.c	2007-08-28 17:35:31.000000000 +0400
@@ -21,6 +21,8 @@
 #include <linux/kobj_map.h>
 #include <linux/cdev.h>
 
+#include <linux/ve_proto.h>
+
 #ifdef CONFIG_KMOD
 #include <linux/kmod.h>
 #endif
@@ -277,6 +279,11 @@ int chrdev_open(struct inode * inode, st
 	struct cdev *new = NULL;
 	int ret = 0;
 
+	ret = get_device_perms_ve(S_IFCHR, inode->i_rdev,
+				  filp->f_mode & (FMODE_READ | FMODE_WRITE));
+	if (ret)
+		return ret;
+
 	spin_lock(&cdev_lock);
 	p = inode->i_cdev;
 	if (!p) {
diff -upr linux-2.6.16.46-0.12.orig/fs/cifs/file.c linux-2.6.16.46-0.12-027test011/fs/cifs/file.c
--- linux-2.6.16.46-0.12.orig/fs/cifs/file.c	2007-08-24 19:28:29.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/cifs/file.c	2007-08-28 17:35:30.000000000 +0400
@@ -29,6 +29,7 @@
 #include <linux/pagevec.h>
 #include <linux/smp_lock.h>
 #include <linux/writeback.h>
+#include <linux/task_io_accounting_ops.h>
 #include <linux/delay.h>
 #include <asm/div64.h>
 #include "cifsfs.h"
@@ -1786,6 +1787,7 @@ static int cifs_readpages(struct file *f
 			}
 			break;
 		} else if (bytes_read > 0) {
+			task_io_account_read(bytes_read);
 			pSMBr = (struct smb_com_read_rsp *)smb_read_data;
 			cifs_copy_cache_pages(mapping, page_list, bytes_read,
 				smb_read_data + 4 /* RFC1001 hdr */ +
diff -upr linux-2.6.16.46-0.12.orig/fs/compat.c linux-2.6.16.46-0.12-027test011/fs/compat.c
--- linux-2.6.16.46-0.12.orig/fs/compat.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/compat.c	2007-08-28 17:35:33.000000000 +0400
@@ -25,6 +25,7 @@
 #include <linux/file.h>
 #include <linux/vfs.h>
 #include <linux/ioctl32.h>
+#include <linux/virtinfo.h>
 #include <linux/ioctl.h>
 #include <linux/init.h>
 #include <linux/sockios.h>	/* for SIOCDEVPRIVATE */
@@ -46,6 +47,7 @@
 #include <linux/rwsem.h>
 #include <linux/tsacct_kern.h>
 #include <linux/mm.h>
+#include <linux/quota.h>
 
 #include <net/sock.h>		/* siocdevprivate_ioctl */
 
@@ -70,7 +72,7 @@ asmlinkage long compat_sys_utime(char __
 		tv[0].tv_usec = 0;
 		tv[1].tv_usec = 0;
 	}
-	return do_utimes(AT_FDCWD, filename, t ? tv : NULL);
+	return do_utimes(AT_FDCWD, filename, t ? tv : NULL, 0);
 }
 
 asmlinkage long compat_sys_futimesat(unsigned int dfd, char __user *filename, struct compat_timeval __user *t)
@@ -84,7 +86,7 @@ asmlinkage long compat_sys_futimesat(uns
 		    get_user(tv[1].tv_usec, &t[1].tv_usec))
 			return -EFAULT;
 	}
-	return do_utimes(dfd, filename, t ? tv : NULL);
+	return do_utimes(dfd, filename, t ? tv : NULL, 0);
 }
 
 asmlinkage long compat_sys_utimes(char __user *filename, struct compat_timeval __user *t)
@@ -197,6 +199,8 @@ asmlinkage long compat_sys_statfs(const 
 		struct kstatfs tmp;
 		error = vfs_statfs(nd.dentry->d_inode->i_sb, &tmp);
 		if (!error)
+			error = faudit_statfs(nd.mnt->mnt_sb, &tmp);
+		if (!error)
 			error = put_compat_statfs(buf, &tmp);
 		path_release(&nd);
 	}
@@ -215,6 +219,8 @@ asmlinkage long compat_sys_fstatfs(unsig
 		goto out;
 	error = vfs_statfs(file->f_dentry->d_inode->i_sb, &tmp);
 	if (!error)
+		error = faudit_statfs(file->f_vfsmnt->mnt_sb, &tmp);
+	if (!error)
 		error = put_compat_statfs(buf, &tmp);
 	fput(file);
 out:
@@ -265,6 +271,8 @@ asmlinkage long compat_sys_statfs64(cons
 		struct kstatfs tmp;
 		error = vfs_statfs(nd.dentry->d_inode->i_sb, &tmp);
 		if (!error)
+			error = faudit_statfs(nd.mnt->mnt_sb, &tmp);
+		if (!error)
 			error = put_compat_statfs64(buf, &tmp);
 		path_release(&nd);
 	}
@@ -286,6 +294,8 @@ asmlinkage long compat_sys_fstatfs64(uns
 		goto out;
 	error = vfs_statfs(file->f_dentry->d_inode->i_sb, &tmp);
 	if (!error)
+		error = faudit_statfs(file->f_vfsmnt->mnt_sb, &tmp);
+	if (!error)
 		error = put_compat_statfs64(buf, &tmp);
 	fput(file);
 out:
@@ -1444,6 +1454,61 @@ out:
 	return ret;
 }
 
+asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
+		qid_t id, void __user *addr)
+{
+	long ret;
+	unsigned int cmds;
+	mm_segment_t old_fs;
+	struct if_dqblk dqblk;
+	struct if32_dqblk {
+		__u32 dqb_bhardlimit[2];
+		__u32 dqb_bsoftlimit[2];
+		__u32 dqb_curspace[2];
+		__u32 dqb_ihardlimit[2];
+		__u32 dqb_isoftlimit[2];
+		__u32 dqb_curinodes[2];
+		__u32 dqb_btime[2];
+		__u32 dqb_itime[2];
+		__u32 dqb_valid;
+	} dqblk32;
+
+	cmds = cmd >> SUBCMDSHIFT;
+
+	switch (cmds) {
+		case Q_GETQUOTA:
+			old_fs = get_fs();
+			set_fs(KERNEL_DS);
+			ret = sys_quotactl(cmd, special, id, &dqblk);
+			set_fs(old_fs);
+			if (ret < 0)
+				break;
+
+			memcpy(&dqblk32, &dqblk, sizeof(dqblk32));
+			dqblk32.dqb_valid = dqblk.dqb_valid;
+			if (copy_to_user(addr, &dqblk32, sizeof(dqblk32)))
+				ret = -EFAULT;
+
+			break;
+		case Q_SETQUOTA:
+			ret = -EFAULT;
+			if (copy_from_user(&dqblk32, addr, sizeof(dqblk32)))
+				break;
+			memcpy(&dqblk, &dqblk32, sizeof(dqblk32));
+			dqblk.dqb_valid = dqblk32.dqb_valid;
+
+			old_fs = get_fs();
+			set_fs(KERNEL_DS);
+			ret = sys_quotactl(cmd, special, id, &dqblk);
+			set_fs(old_fs);
+			break;
+		default:
+			ret = sys_quotactl(cmd, special, id, addr);
+			break;
+	}
+	return ret;
+}
+
 #ifdef CONFIG_MMU
 
 #define free_arg_pages(bprm) do { } while (0)
@@ -1477,6 +1542,10 @@ int compat_do_execve(char * filename,
 	int retval;
 	int i;
 
+	retval = virtinfo_gencall(VIRTINFO_DOEXECVE, NULL);
+	if (retval)
+		return retval;
+
 	retval = -ENOMEM;
 	bprm = kmalloc(sizeof(*bprm), GFP_KERNEL);
 	if (!bprm)
diff -upr linux-2.6.16.46-0.12.orig/fs/dcache.c linux-2.6.16.46-0.12-027test011/fs/dcache.c
--- linux-2.6.16.46-0.12.orig/fs/dcache.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/dcache.c	2007-08-28 17:35:36.000000000 +0400
@@ -28,11 +28,17 @@
 #include <linux/module.h>
 #include <linux/mount.h>
 #include <linux/file.h>
+#include <linux/namei.h>
 #include <asm/uaccess.h>
 #include <linux/security.h>
 #include <linux/seqlock.h>
 #include <linux/swap.h>
 #include <linux/bootmem.h>
+#include <linux/kernel_stat.h>
+#include <net/inet_sock.h>
+
+#include <ub/ub_dcache.h>
+#include <ub/ub_dcache_op.h>
 
 /* #define DCACHE_DEBUG 1 */
 
@@ -44,7 +50,7 @@ static seqlock_t rename_lock __cacheline
 
 EXPORT_SYMBOL(dcache_lock);
 
-static kmem_cache_t *dentry_cache; 
+kmem_cache_t *dentry_cache;
 
 #define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
 
@@ -114,6 +120,29 @@ static void dentry_iput(struct dentry * 
 	}
 }
 
+/**
+ * d_kill - kill dentry and return parent
+ * @dentry: dentry to kill
+ *
+ * Called with dcache_lock and d_lock, releases both.  The dentry must
+ * already be unhashed and removed from the LRU.
+ *
+ * If this is the root of the dentry tree, return NULL.
+ */
+static struct dentry *d_kill(struct dentry *dentry)
+{
+	struct dentry *parent;
+
+	list_del(&dentry->d_u.d_child);
+	dentry_stat.nr_dentry--;	/* For d_free, below */
+	preempt_enable_no_resched();
+	/*drops the locks, at that point nobody can reach this dentry */
+	dentry_iput(dentry);
+	parent = dentry->d_parent;
+	d_free(dentry);
+	return dentry == parent ? NULL : parent;
+}
+
 /* 
  * This is dput
  *
@@ -141,25 +170,18 @@ static void dentry_iput(struct dentry * 
  * they too may now get deleted.
  *
  * no dcache lock, please.
+ * preemption is disabled by the caller.
  */
 
-void dput(struct dentry *dentry)
+static void dput_recursive(struct dentry *dentry)
 {
-	if (!dentry)
-		return;
-
 repeat:
-	if (atomic_read(&dentry->d_count) == 1)
-		might_sleep();
 	if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
-		return;
+		goto out_preempt;
 
 	spin_lock(&dentry->d_lock);
-	if (atomic_read(&dentry->d_count)) {
-		spin_unlock(&dentry->d_lock);
-		spin_unlock(&dcache_lock);
-		return;
-	}
+	if (atomic_read(&dentry->d_count))
+		goto out_unlock;
 
 	/*
 	 * AV: ->d_delete() is _NOT_ allowed to block now.
@@ -176,36 +198,43 @@ repeat:
   		list_add(&dentry->d_lru, &dentry_unused);
   		dentry_stat.nr_unused++;
   	}
+out_unlock:
  	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
+out_preempt:
+	preempt_enable();
 	return;
 
 unhash_it:
 	__d_drop(dentry);
-
-kill_it: {
-		struct dentry *parent;
-
-		/* If dentry was on d_lru list
-		 * delete it from there
-		 */
-  		if (!list_empty(&dentry->d_lru)) {
-  			list_del(&dentry->d_lru);
-  			dentry_stat.nr_unused--;
-  		}
-  		list_del(&dentry->d_u.d_child);
-		dentry_stat.nr_dentry--;	/* For d_free, below */
-		/*drops the locks, at that point nobody can reach this dentry */
-		dentry_iput(dentry);
-		parent = dentry->d_parent;
-		d_free(dentry);
-		if (dentry == parent)
-			return;
-		dentry = parent;
+kill_it:
+	/* If dentry was on d_lru list
+	 * delete it from there
+	 */
+	if (!list_empty(&dentry->d_lru)) {
+		list_del(&dentry->d_lru);
+		dentry_stat.nr_unused--;
+	}
+	dentry = d_kill(dentry);
+	if (dentry) {
+		preempt_disable();
 		goto repeat;
 	}
 }
 
+void dput(struct dentry *dentry)
+{
+	if (!dentry)
+		return;
+
+	if (atomic_read(&dentry->d_count) == 1)
+		might_sleep();
+
+	preempt_disable();
+	ub_dentry_uncharge(dentry);
+	dput_recursive(dentry);
+}
+
 /**
  * d_invalidate - invalidate a dentry
  * @dentry: dentry to invalidate
@@ -272,6 +301,8 @@ static inline struct dentry * __dget_loc
 		dentry_stat.nr_unused--;
 		list_del_init(&dentry->d_lru);
 	}
+
+	ub_dentry_charge_nofail(dentry);
 	return dentry;
 }
 
@@ -360,26 +391,49 @@ restart:
  * Throw away a dentry - free the inode, dput the parent.
  * This requires that the LRU list has already been
  * removed.
+ *
+ * If prune_parents is true, try to prune ancestors as well.
+ *
  * Called with dcache_lock, drops it and then regains.
  */
-static inline void prune_one_dentry(struct dentry * dentry)
+static inline void prune_one_dentry(struct dentry * dentry, int prune_parents)
 {
-	struct super_block *sb = dentry->d_sb;
-	struct dentry * parent;
-
 	__d_drop(dentry);
-	list_del(&dentry->d_u.d_child);
-	dentry_stat.nr_dentry--;	/* For d_free, below */
-	sb->s_prunes++;
-	dentry_iput(dentry);
-	parent = dentry->d_parent;
-	d_free(dentry);
-	if (parent != dentry)
-		dput(parent);
+	preempt_disable();
+	dentry = d_kill(dentry);
+	if (!prune_parents) {
+		/*
+		 * dentry is not in use, only child (not outside)
+		 * references change, so parent->d_inuse does not change
+		 */
+		if (dentry) {
+			preempt_disable();
+			dput_recursive(dentry);
+		}
+		spin_lock(&dcache_lock);
+		return;
+	}
+
+	/*
+	 * Prune ancestors.  Locking is simpler than in dput(),
+	 * because dcache_lock needs to be taken anyway.
+	 */
 	spin_lock(&dcache_lock);
-	sb->s_prunes--;
-	if (likely(!sb->s_prunes))
-		wake_up(&sb->s_wait_prunes);
+	while (dentry) {
+		if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock))
+			return;
+
+		if (dentry->d_op && dentry->d_op->d_delete)
+			dentry->d_op->d_delete(dentry);
+		if (!list_empty(&dentry->d_lru)) {
+			list_del(&dentry->d_lru);
+			dentry_stat.nr_unused--;
+		}
+		__d_drop(dentry);
+		preempt_disable();
+		dentry = d_kill(dentry);
+		spin_lock(&dcache_lock);
+	}
 }
 
 /**
@@ -387,6 +441,7 @@ static inline void prune_one_dentry(stru
  * @count: number of entries to try and free
  * @sb: if given, ignore dentries for other superblocks
  *         which are being unmounted.
+ * @prune_parents: if true, try to prune ancestors as well in one go
  *
  * Shrink the dcache. This is done when we need
  * more memory, or simply when we need to unmount
@@ -397,7 +452,7 @@ static inline void prune_one_dentry(stru
  * all the dentries are in use.
  */
  
-static void prune_dcache(int count, struct super_block *sb)
+static void prune_dcache(int count, struct super_block *sb, int prune_parents)
 {
 	spin_lock(&dcache_lock);
 	for (; count ; count--) {
@@ -457,7 +512,7 @@ static void prune_dcache(int count, stru
 		 * without taking the s_umount lock (I already hold it).
 		 */
 		if (sb && dentry->d_sb == sb) {
-			prune_one_dentry(dentry);
+			prune_one_dentry(dentry, prune_parents);
 			continue;
 		}
 		/*
@@ -472,7 +527,7 @@ static void prune_dcache(int count, stru
 		s_umount = &dentry->d_sb->s_umount;
 		if (down_read_trylock(s_umount)) {
 			if (dentry->d_sb->s_root != NULL) {
-				prune_one_dentry(dentry);
+				prune_one_dentry(dentry, prune_parents);
 				up_read(s_umount);
 				continue;
 			}
@@ -544,7 +599,8 @@ repeat:
 			spin_unlock(&dentry->d_lock);
 			continue;
 		}
-		prune_one_dentry(dentry);
+		prune_one_dentry(dentry, 1);
+		cond_resched_lock(&dcache_lock);
 		goto repeat;
 	}
 	spin_unlock(&dcache_lock);
@@ -682,60 +738,19 @@ out:
 	return found;
 }
 
-/*
- * A special version of wait_event(!sb->s_prunes) which takes the dcache_lock
- * when checking the condition and gives feedback if we slept.
- */
-static int wait_on_prunes(struct super_block *sb)
-{
-	DEFINE_WAIT(wait);
-	int slept = 0;
-
-#ifdef DCACHE_DEBUG
-	printk(KERN_DEBUG "%s: waiting for %d prunes\n", __FUNCTION__,
-	       sb->s_prunes);
-#endif
-
-	spin_lock(&dcache_lock);
-	for (;;) {
-		prepare_to_wait(&sb->s_wait_prunes, &wait,
-				TASK_UNINTERRUPTIBLE);
-		if (!sb->s_prunes)
-			break;
-		spin_unlock(&dcache_lock);
-		schedule();
-		slept = 1;
-		spin_lock(&dcache_lock);
-	}
-	spin_unlock(&dcache_lock);
-	finish_wait(&sb->s_wait_prunes, &wait);
-	return slept;
-}
-
 /**
  * shrink_dcache_parent - prune dcache
  * @parent: parent of entries to prune
  *
  * Prune the dcache to remove unused children of the parent dentry.
  */
-/*
- * If we slept on waiting for other prunes to finish, there maybe are
- * some dentries the d_lru list that we have "overlooked" the last
- * time we called select_parent(). Therefor lets restart in this case.
- */
+ 
 void shrink_dcache_parent(struct dentry * parent)
 {
 	int found;
-	struct super_block *sb = parent->d_sb;
 
- again:
 	while ((found = select_parent(parent)) != 0)
-		prune_dcache(found, parent->d_sb);
-
-	/* If we are called from generic_shutdown_super() during
-	 * umount of a filesystem, we want to check for other prunes */
-	if (!sb->s_root && wait_on_prunes(sb))
-		goto again;
+		prune_dcache(found, parent->d_sb, 1);
 }
 
 /**
@@ -774,7 +789,7 @@ void shrink_dcache_anon(struct super_blo
 			}
 		}
 		spin_unlock(&dcache_lock);
-		prune_dcache(found, sb);
+		prune_dcache(found, sb, 1);
 	} while(found);
 }
 
@@ -792,12 +807,18 @@ void shrink_dcache_anon(struct super_blo
  */
 static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
 {
+	int res = -1;
+
+	KSTAT_PERF_ENTER(shrink_dcache)
 	if (nr) {
 		if (!(gfp_mask & __GFP_FS))
-			return -1;
-		prune_dcache(nr, NULL);
+			goto out;
+		prune_dcache(nr, NULL, 1);
 	}
-	return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
+	res = (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
+out:
+	KSTAT_PERF_LEAVE(shrink_dcache)
+	return res;
 }
 
 /**
@@ -815,21 +836,26 @@ struct dentry *d_alloc(struct dentry * p
 	struct dentry *dentry;
 	char *dname;
 
+	dname = NULL;
+	if (name->len > DNAME_INLINE_LEN-1) {
+		dname = kmalloc(name->len + 1, GFP_KERNEL);
+		if (!dname)
+			goto err_name;
+	}
+
+	ub_dentry_alloc_start();
 	dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); 
 	if (!dentry)
-		return NULL;
+		goto err_alloc;
 
-	if (name->len > DNAME_INLINE_LEN-1) {
-		dname = kmalloc(name->len + 1, GFP_KERNEL);
-		if (!dname) {
-			kmem_cache_free(dentry_cache, dentry); 
-			return NULL;
-		}
-	} else  {
+	preempt_disable();
+	if (dname == NULL)
 		dname = dentry->d_iname;
-	}	
 	dentry->d_name.name = dname;
 
+	if (ub_dentry_alloc(dentry))
+		goto err_charge;
+
 	dentry->d_name.len = name->len;
 	dentry->d_name.hash = name->hash;
 	memcpy(dname, name->name, name->len);
@@ -860,12 +886,27 @@ struct dentry *d_alloc(struct dentry * p
 	}
 
 	spin_lock(&dcache_lock);
-	if (parent)
+	if (parent) {
 		list_add(&dentry->d_u.d_child, &parent->d_subdirs);
+		if (parent->d_flags & DCACHE_VIRTUAL)
+			dentry->d_flags |= DCACHE_VIRTUAL;
+	}
 	dentry_stat.nr_dentry++;
 	spin_unlock(&dcache_lock);
+	preempt_enable();
+	ub_dentry_alloc_end();
 
 	return dentry;
+
+err_charge:
+	preempt_enable();
+	kmem_cache_free(dentry_cache, dentry);
+err_alloc:
+	if (name->len > DNAME_INLINE_LEN - 1)
+		kfree(dname);
+	ub_dentry_alloc_end();
+err_name:
+	return NULL;
 }
 
 struct dentry *d_alloc_name(struct dentry *parent, const char *name)
@@ -1153,12 +1194,12 @@ struct dentry * __d_lookup(struct dentry
 	unsigned int hash = name->hash;
 	const unsigned char *str = name->name;
 	struct hlist_head *head = d_hash(parent,hash);
-	struct dentry *found = NULL;
 	struct hlist_node *node;
-	struct dentry *dentry;
+	struct dentry *dentry, *found;
 
 	rcu_read_lock();
 	
+	found = NULL;
 	hlist_for_each_entry_rcu(dentry, node, head, d_hash) {
 		struct qstr *qstr;
 
@@ -1195,6 +1236,8 @@ struct dentry * __d_lookup(struct dentry
 		if (!d_unhashed(dentry)) {
 			atomic_inc(&dentry->d_count);
 			found = dentry;
+			if (ub_dentry_charge(found))
+				goto charge_failure;
 		}
 		spin_unlock(&dentry->d_lock);
 		break;
@@ -1204,6 +1247,14 @@ next:
  	rcu_read_unlock();
 
  	return found;
+
+charge_failure:
+	spin_unlock(&found->d_lock);
+	rcu_read_unlock();
+	/* dentry is now unhashed, just kill it */
+	dput(found);
+	/* ... and fail lookup */
+	return NULL;
 }
 
 /**
@@ -1454,6 +1505,32 @@ already_unhashed:
 }
 
 /**
+ * __d_path_add_deleted - prepend "(deleted) " text
+ * @end: a pointer to the character after free space at the beginning of the
+ *       buffer
+ * @buflen: remaining free space
+ */
+static inline char * __d_path_add_deleted(char * end, int buflen)
+{
+	buflen -= 10;
+	if (buflen < 0)
+		return ERR_PTR(-ENAMETOOLONG);
+	end -= 10;
+	memcpy(end, "(deleted) ", 10);
+	return end;
+}
+
+/**
+ * d_root_check - checks if dentry is accessible from current's fs root
+ * @dentry: dentry to be verified
+ * @vfsmnt: vfsmnt to which the dentry belongs
+ */
+int d_root_check(struct dentry *dentry, struct vfsmount *vfsmnt)
+{
+	return PTR_ERR(d_path(dentry, vfsmnt, NULL, 0));
+}
+
+/**
  * d_path - return the path of a dentry
  * @dentry: dentry to report
  * @vfsmnt: vfsmnt to which the dentry belongs
@@ -1469,41 +1546,40 @@ already_unhashed:
  *
  * "buflen" should be positive. Caller holds the dcache_lock.
  */
-static char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt,
+char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt,
 			struct dentry *root, struct vfsmount *rootmnt,
 			char *buffer, int buflen)
 {
 	char * end = buffer+buflen;
-	char * retval;
+	char * retval = NULL;
 	int namelen;
+	int deleted;
+	struct vfsmount *oldvfsmnt;
 
-	*--end = '\0';
-	buflen--;
-	if (!IS_ROOT(dentry) && d_unhashed(dentry)) {
-		buflen -= 10;
-		end -= 10;
-		if (buflen < 0)
+	oldvfsmnt = vfsmnt;
+	deleted = (!IS_ROOT(dentry) && d_unhashed(dentry));
+	if (buffer != NULL) {
+		*--end = '\0';
+		buflen--;
+
+		if (buflen < 1)
 			goto Elong;
-		memcpy(end, " (deleted)", 10);
+		/* Get '/' right */
+		retval = end-1;
+		*retval = '/';
 	}
 
-	if (buflen < 1)
-		goto Elong;
-	/* Get '/' right */
-	retval = end-1;
-	*retval = '/';
-
 	for (;;) {
 		struct dentry * parent;
 
 		if (dentry == root && vfsmnt == rootmnt)
 			break;
 		if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
-			/* Global root? */
+			/* root of a tree? */
 			spin_lock(&vfsmount_lock);
 			if (vfsmnt->mnt_parent == vfsmnt) {
 				spin_unlock(&vfsmount_lock);
-				goto global_root;
+				goto other_root;
 			}
 			dentry = vfsmnt->mnt_mountpoint;
 			vfsmnt = vfsmnt->mnt_parent;
@@ -1512,30 +1588,55 @@ static char * __d_path( struct dentry *d
 		}
 		parent = dentry->d_parent;
 		prefetch(parent);
+		if (buffer != NULL) {
+			namelen = dentry->d_name.len;
+			buflen -= namelen + 1;
+			if (buflen < 0)
+				goto Elong;
+			end -= namelen;
+			memcpy(end, dentry->d_name.name, namelen);
+			*--end = '/';
+			retval = end;
+		}
+		dentry = parent;
+	}
+	/* the given root point is reached */
+finish:
+	if (buffer != NULL && deleted)
+		retval = __d_path_add_deleted(end, buflen);
+	return retval;
+
+other_root:
+	/*
+	 * We traversed the tree upward and reached a root, but the given
+	 * lookup terminal point wasn't encountered.  It means either that the
+	 * dentry is out of our scope or belongs to an abstract space like
+	 * sock_mnt or pipe_mnt.  Check for it.
+	 *
+	 * There are different options to check it.
+	 * We may assume that any dentry tree is unreachable unless it's
+	 * connected to `root' (defined as fs root of init aka child reaper)
+	 * and expose all paths that are not connected to it.
+	 * The other option is to allow exposing of known abstract spaces
+	 * explicitly and hide the path information for other cases.
+	 * This approach is more safe, let's take it.  2001/04/22  SAW
+	 */
+	if (!(oldvfsmnt->mnt_sb->s_flags & MS_NOUSER))
+		return ERR_PTR(-EINVAL);
+	if (buffer != NULL) {
 		namelen = dentry->d_name.len;
-		buflen -= namelen + 1;
+		buflen -= namelen;
 		if (buflen < 0)
 			goto Elong;
-		end -= namelen;
-		memcpy(end, dentry->d_name.name, namelen);
-		*--end = '/';
-		retval = end;
-		dentry = parent;
+		retval -= namelen-1;	/* hit the slash */
+		memcpy(retval, dentry->d_name.name, namelen);
 	}
+	goto finish;
 
-	return retval;
-
-global_root:
-	namelen = dentry->d_name.len;
-	buflen -= namelen;
-	if (buflen < 0)
-		goto Elong;
-	retval -= namelen-1;	/* hit the slash */
-	memcpy(retval, dentry->d_name.name, namelen);
-	return retval;
 Elong:
 	return ERR_PTR(-ENAMETOOLONG);
 }
+EXPORT_SYMBOL(__d_path);
 
 /* write full pathname into buffer and return start of pathname */
 char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
@@ -1557,6 +1658,229 @@ char * d_path(struct dentry *dentry, str
 	return res;
 }
 
+#ifdef CONFIG_VE
+#include <net/sock.h>
+#include <linux/ip.h>
+#include <linux/file.h>
+#include <linux/namespace.h>
+#include <linux/vzratelimit.h>
+
+static void mark_sub_tree_virtual(struct dentry *d)
+{
+	struct dentry *orig_root;
+
+	orig_root = d;
+	while (1) {
+		spin_lock(&d->d_lock);
+		d->d_flags |= DCACHE_VIRTUAL;
+		spin_unlock(&d->d_lock);
+
+		if (!list_empty(&d->d_subdirs)) {
+			d = list_entry(d->d_subdirs.next,
+					struct dentry, d_u.d_child);
+			continue;
+		}
+		if (d == orig_root)
+			break;
+		while (d == list_entry(d->d_parent->d_subdirs.prev,
+					struct dentry, d_u.d_child)) {
+			d = d->d_parent;
+			if (d == orig_root)
+				goto out;
+		}
+		d = list_entry(d->d_u.d_child.next,
+				struct dentry, d_u.d_child);
+	}
+out:
+	return;
+}
+
+void mark_tree_virtual(struct vfsmount *m, struct dentry *d)
+{
+	struct vfsmount *orig_rootmnt;
+
+	spin_lock(&dcache_lock);
+	spin_lock(&vfsmount_lock);
+	orig_rootmnt = m;
+	while (1) {
+		mark_sub_tree_virtual(d);
+		if (!list_empty(&m->mnt_mounts)) {
+			m = list_entry(m->mnt_mounts.next,
+					struct vfsmount, mnt_child);
+			d = m->mnt_root;
+			continue;
+		}
+		if (m == orig_rootmnt)
+			break;
+		while (m == list_entry(m->mnt_parent->mnt_mounts.prev,
+					struct vfsmount, mnt_child)) {
+			m = m->mnt_parent;
+			if (m == orig_rootmnt)
+				goto out;
+		}
+		m = list_entry(m->mnt_child.next,
+				struct vfsmount, mnt_child);
+		d = m->mnt_root;
+	}
+out:
+	spin_unlock(&vfsmount_lock);
+	spin_unlock(&dcache_lock);
+}
+EXPORT_SYMBOL(mark_tree_virtual);
+
+static struct vz_rate_info area_ri = { 20, 10*HZ };
+#define VE_AREA_ACC_CHECK	0x0001
+#define VE_AREA_ACC_DENY	0x0002
+#define VE_AREA_EXEC_CHECK	0x0010
+#define VE_AREA_EXEC_DENY	0x0020
+#define VE0_AREA_ACC_CHECK	0x0100
+#define VE0_AREA_ACC_DENY	0x0200
+#define VE0_AREA_EXEC_CHECK	0x1000
+#define VE0_AREA_EXEC_DENY	0x2000
+int ve_area_access_check = 0;
+
+static void print_connection_info(struct task_struct *tsk)
+{
+	struct files_struct *files;
+	struct fdtable *fdt;
+	int fd;
+
+	files = get_files_struct(tsk);
+	if (!files)
+		return;
+
+	spin_lock(&files->file_lock);
+	fdt = files_fdtable(files);
+	for (fd = 0; fd < fdt->max_fds; fd++) {
+		struct file *file;
+		struct inode *inode;
+		struct socket *socket;
+		struct sock *sk;
+		struct inet_sock *inet;
+
+		file = fdt->fd[fd];
+		if (file == NULL)
+			continue;
+
+		inode = file->f_dentry->d_inode;
+		if (!S_ISSOCK(inode->i_mode))
+			continue;
+
+		socket = SOCKET_I(inode);
+		if (socket == NULL)
+			continue;
+
+		sk = socket->sk;
+		if ((sk->sk_family != PF_INET && sk->sk_family != PF_INET6)
+		    || sk->sk_type != SOCK_STREAM)
+			continue;
+
+		inet = inet_sk(sk);
+		printk(KERN_ALERT "connection from %u.%u.%u.%u:%u to port %u\n",
+				NIPQUAD(inet->daddr), ntohs(inet->dport),
+				inet->num);
+	}
+	spin_unlock(&files->file_lock);
+	put_files_struct(files);
+}
+
+static void check_alert(struct vfsmount *vfsmnt, struct dentry *dentry,
+		char *str)
+{
+	struct task_struct *tsk;
+	unsigned long page;
+	struct super_block *sb;
+	char *p;
+
+	if (!vz_ratelimit(&area_ri))
+		return;
+
+	tsk = current;
+	p = ERR_PTR(-ENOMEM);
+	page = __get_free_page(GFP_KERNEL);
+	if (page) {
+		spin_lock(&dcache_lock);
+		p = __d_path(dentry, vfsmnt, tsk->fs->root, tsk->fs->rootmnt,
+				(char *)page, PAGE_SIZE);
+		spin_unlock(&dcache_lock);
+	}
+	if (IS_ERR(p))
+		p = "(undefined)";
+
+	sb = dentry->d_sb;
+	printk(KERN_ALERT "%s check alert! file:[%s] from %d/%s, dev%x\n"
+			"Task %d/%d[%s] from VE%d, execenv %d\n",
+			str, p,	sb->s_type->owner_env->veid,
+			sb->s_type->name, sb->s_dev,
+			tsk->pid, virt_pid(tsk), tsk->comm,
+			VE_TASK_INFO(tsk)->owner_env->veid,
+			get_exec_env()->veid);
+
+	free_page(page);
+
+	print_connection_info(tsk);
+
+	read_lock(&tasklist_lock);
+	tsk = tsk->real_parent;
+	get_task_struct(tsk);
+	read_unlock(&tasklist_lock);
+
+	printk(KERN_ALERT "Parent %d/%d[%s] from VE%d\n",
+			tsk->pid, virt_pid(tsk), tsk->comm,
+			VE_TASK_INFO(tsk)->owner_env->veid);
+
+	print_connection_info(tsk);
+	put_task_struct(tsk);
+	dump_stack();
+}
+#endif
+
+int check_area_access_ve(struct dentry *dentry, struct vfsmount *mnt)
+{
+#ifdef CONFIG_VE
+	int check, alert, deny;
+
+	if (ve_is_super(get_exec_env())) {
+		check = ve_area_access_check & VE0_AREA_ACC_CHECK;
+		alert = dentry->d_flags & DCACHE_VIRTUAL;
+		deny = ve_area_access_check & VE0_AREA_ACC_DENY;
+	} else {
+		check = ve_area_access_check & VE_AREA_ACC_CHECK;
+		alert = !(dentry->d_flags & DCACHE_VIRTUAL);
+		deny = ve_area_access_check & VE_AREA_ACC_DENY;
+	}
+
+	if (check && alert)
+		check_alert(mnt, dentry, "Access");
+	if (deny && alert)
+		return -EACCES;
+#endif
+	return 0;
+}
+
+int check_area_execute_ve(struct dentry *dentry, struct vfsmount *mnt)
+{
+#ifdef CONFIG_VE
+	int check, alert, deny;
+
+	if (ve_is_super(get_exec_env())) {
+		check = ve_area_access_check & VE0_AREA_EXEC_CHECK;
+		alert = dentry->d_flags & DCACHE_VIRTUAL;
+		deny = ve_area_access_check & VE0_AREA_EXEC_DENY;
+	} else {
+		check = ve_area_access_check & VE_AREA_EXEC_CHECK;
+		alert = !(dentry->d_flags & DCACHE_VIRTUAL);
+		deny = ve_area_access_check & VE_AREA_EXEC_DENY;
+	}
+
+	if (check && alert)
+		check_alert(mnt, dentry, "Exec");
+	if (deny && alert)
+		return -EACCES;
+#endif
+	return 0;
+}
+
 /*
  * NOTE! The user-level library version returns a
  * character pointer. The kernel system call just
@@ -1693,10 +2017,12 @@ resume:
 			goto repeat;
 		}
 		atomic_dec(&dentry->d_count);
+		ub_dentry_uncharge_locked(dentry);
 	}
 	if (this_parent != root) {
 		next = this_parent->d_u.d_child.next;
 		atomic_dec(&this_parent->d_count);
+		ub_dentry_uncharge_locked(this_parent);
 		this_parent = this_parent->d_parent;
 		goto resume;
 	}
diff -upr linux-2.6.16.46-0.12.orig/fs/devpts/inode.c linux-2.6.16.46-0.12-027test011/fs/devpts/inode.c
--- linux-2.6.16.46-0.12.orig/fs/devpts/inode.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/devpts/inode.c	2007-08-28 17:35:31.000000000 +0400
@@ -12,6 +12,7 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/ve.h>
 #include <linux/fs.h>
 #include <linux/sched.h>
 #include <linux/namei.h>
@@ -21,16 +22,17 @@
 
 #define DEVPTS_SUPER_MAGIC 0x1cd1
 
+struct devpts_config devpts_config = {.mode = 0600};
+
+#ifndef CONFIG_VE
 static struct vfsmount *devpts_mnt;
 static struct dentry *devpts_root;
-
-static struct {
-	int setuid;
-	int setgid;
-	uid_t   uid;
-	gid_t   gid;
-	umode_t mode;
-} config = {.mode = 0600};
+#define config	devpts_config
+#else
+#define devpts_mnt	(get_exec_env()->devpts_mnt)
+#define devpts_root	(get_exec_env()->devpts_root)
+#define config		(*(get_exec_env()->devpts_config))
+#endif
 
 static int devpts_remount(struct super_block *sb, int *flags, char *data)
 {
@@ -56,7 +58,8 @@ static int devpts_remount(struct super_b
 		} else if (sscanf(this_char, "mode=%o%c", &n, &dummy) == 1)
 			mode = n & ~S_IFMT;
 		else {
-			printk("devpts: called with bogus options\n");
+			ve_printk(VE_LOG,
+					"devpts: called with bogus options\n");
 			return -EINVAL;
 		}
 	}
@@ -114,13 +117,15 @@ static struct super_block *devpts_get_sb
 	return get_sb_single(fs_type, flags, data, devpts_fill_super);
 }
 
-static struct file_system_type devpts_fs_type = {
+struct file_system_type devpts_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "devpts",
 	.get_sb		= devpts_get_sb,
 	.kill_sb	= kill_anon_super,
 };
 
+EXPORT_SYMBOL(devpts_fs_type);
+
 /*
  * The normal naming convention is simply /dev/pts/<number>; this conforms
  * to the System V naming convention
@@ -212,6 +217,7 @@ static int __init init_devpts_fs(void)
 
 static void __exit exit_devpts_fs(void)
 {
+	/* the code is never called, the argument is irrelevant */
 	unregister_filesystem(&devpts_fs_type);
 	mntput(devpts_mnt);
 }
diff -upr linux-2.6.16.46-0.12.orig/fs/direct-io.c linux-2.6.16.46-0.12-027test011/fs/direct-io.c
--- linux-2.6.16.46-0.12.orig/fs/direct-io.c	2007-08-24 19:28:30.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/direct-io.c	2007-08-28 17:35:30.000000000 +0400
@@ -27,6 +27,7 @@
 #include <linux/slab.h>
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
+#include <linux/task_io_accounting_ops.h>
 #include <linux/bio.h>
 #include <linux/wait.h>
 #include <linux/err.h>
@@ -668,6 +669,13 @@ submit_page_section(struct dio *dio, str
 {
 	int ret = 0;
 
+	if (dio->rw & WRITE) {
+		/*
+		 * Read accounting is performed in submit_bio()
+		 */
+		task_io_account_write(page, len, 1);
+	}
+
 	/*
 	 * Can we just grow the current page's presence in the dio?
 	 */
diff -upr linux-2.6.16.46-0.12.orig/fs/dquot.c linux-2.6.16.46-0.12-027test011/fs/dquot.c
--- linux-2.6.16.46-0.12.orig/fs/dquot.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/dquot.c	2007-08-28 17:35:33.000000000 +0400
@@ -158,7 +158,9 @@ static struct quota_format_type *find_qu
 	struct quota_format_type *actqf;
 
 	spin_lock(&dq_list_lock);
-	for (actqf = quota_formats; actqf && actqf->qf_fmt_id != id; actqf = actqf->qf_next);
+	for (actqf = quota_formats;
+		 actqf && (actqf->qf_fmt_id != id || actqf->qf_ops == NULL);
+						 actqf = actqf->qf_next);
 	if (!actqf || !try_module_get(actqf->qf_owner)) {
 		int qm;
 
diff -upr linux-2.6.16.46-0.12.orig/fs/eventpoll.c linux-2.6.16.46-0.12-027test011/fs/eventpoll.c
--- linux-2.6.16.46-0.12.orig/fs/eventpoll.c	2007-08-24 19:28:32.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/eventpoll.c	2007-08-28 17:35:33.000000000 +0400
@@ -105,11 +105,6 @@
 #define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ)
 
 
-struct epoll_filefd {
-	struct file *file;
-	int fd;
-};
-
 /*
  * Node that is linked into the "wake_task_list" member of the "struct poll_safewake".
  * It is used to keep track on all tasks that are currently inside the wake_up() code
@@ -132,36 +127,6 @@ struct poll_safewake {
 	spinlock_t lock;
 };
 
-/*
- * This structure is stored inside the "private_data" member of the file
- * structure and rapresent the main data sructure for the eventpoll
- * interface.
- */
-struct eventpoll {
-	/* Protect the this structure access */
-	rwlock_t lock;
-
-	/*
-	 * This semaphore is used to ensure that files are not removed
-	 * while epoll is using them. This is read-held during the event
-	 * collection loop and it is write-held during the file cleanup
-	 * path, the epoll file exit code and the ctl operations.
-	 */
-	struct rw_semaphore sem;
-
-	/* Wait queue used by sys_epoll_wait() */
-	wait_queue_head_t wq;
-
-	/* Wait queue used by file->poll() */
-	wait_queue_head_t poll_wait;
-
-	/* List of ready file descriptors */
-	struct list_head rdllist;
-
-	/* RB-Tree root used to store monitored fd structs */
-	struct rb_root rbr;
-};
-
 /* Wait structure used by the poll hooks */
 struct eppoll_entry {
 	/* List header used to link this structure to the "struct epitem" */
@@ -180,51 +145,6 @@ struct eppoll_entry {
 	wait_queue_head_t *whead;
 };
 
-/*
- * Each file descriptor added to the eventpoll interface will
- * have an entry of this type linked to the hash.
- */
-struct epitem {
-	/* RB-Tree node used to link this structure to the eventpoll rb-tree */
-	struct rb_node rbn;
-
-	/* List header used to link this structure to the eventpoll ready list */
-	struct list_head rdllink;
-
-	/* The file descriptor information this item refers to */
-	struct epoll_filefd ffd;
-
-	/* Number of active wait queue attached to poll operations */
-	int nwait;
-
-	/* List containing poll wait queues */
-	struct list_head pwqlist;
-
-	/* The "container" of this item */
-	struct eventpoll *ep;
-
-	/* The structure that describe the interested events and the source fd */
-	struct epoll_event event;
-
-	/*
-	 * Used to keep track of the usage count of the structure. This avoids
-	 * that the structure will desappear from underneath our processing.
-	 */
-	atomic_t usecnt;
-
-	/* List header used to link this item to the "struct file" items list */
-	struct list_head fllink;
-
-	/* List header used to link the item to the transfer list */
-	struct list_head txlink;
-
-	/*
-	 * This is used during the collection/transfer of events to userspace
-	 * to pin items empty events set.
-	 */
-	unsigned int revents;
-};
-
 /* Wrapper struct used by poll queueing */
 struct ep_pqueue {
 	poll_table pt;
@@ -237,14 +157,10 @@ static void ep_poll_safewake_init(struct
 static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq);
 static int ep_alloc(struct eventpoll **pep);
 static void ep_free(struct eventpoll *ep);
-static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
 static void ep_use_epitem(struct epitem *epi);
-static void ep_release_epitem(struct epitem *epi);
 static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
 				 poll_table *pt);
 static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi);
-static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
-		     struct file *tfile, int fd);
 static int ep_modify(struct eventpoll *ep, struct epitem *epi,
 		     struct epoll_event *event);
 static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi);
@@ -272,7 +188,8 @@ static struct super_block *eventpollfs_g
 /*
  * This semaphore is used to serialize ep_free() and eventpoll_release_file().
  */
-static struct semaphore epsem;
+struct semaphore epsem;
+EXPORT_SYMBOL_GPL(epsem);
 
 /* Safe wake up implementation */
 static struct poll_safewake psw;
@@ -287,10 +204,11 @@ static kmem_cache_t *pwq_cache;
 static struct vfsmount *eventpoll_mnt;
 
 /* File callbacks that implement the eventpoll file behaviour */
-static struct file_operations eventpoll_fops = {
+struct file_operations eventpoll_fops = {
 	.release	= ep_eventpoll_close,
 	.poll		= ep_eventpoll_poll
 };
+EXPORT_SYMBOL_GPL(eventpoll_fops);
 
 /*
  * This is used to register the virtual file system from where
@@ -540,7 +458,7 @@ eexit_1:
 		     current, size, error));
 	return error;
 }
-
+EXPORT_SYMBOL_GPL(sys_epoll_create);
 
 /*
  * The following function implements the controller interface for
@@ -850,7 +768,7 @@ static void ep_free(struct eventpoll *ep
  * the returned item, so the caller must call ep_release_epitem()
  * after finished using the "struct epitem".
  */
-static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
+struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
 {
 	int kcmp;
 	unsigned long flags;
@@ -880,6 +798,7 @@ static struct epitem *ep_find(struct eve
 
 	return epir;
 }
+EXPORT_SYMBOL_GPL(ep_find);
 
 
 /*
@@ -898,13 +817,13 @@ static void ep_use_epitem(struct epitem 
  * has finished using the structure. It might lead to freeing the
  * structure itself if the count goes to zero.
  */
-static void ep_release_epitem(struct epitem *epi)
+void ep_release_epitem(struct epitem *epi)
 {
 
 	if (atomic_dec_and_test(&epi->usecnt))
 		kmem_cache_free(epi_cache, epi);
 }
-
+EXPORT_SYMBOL_GPL(ep_release_epitem);
 
 /*
  * This is the callback that is used to add our wait queue to the
@@ -950,7 +869,7 @@ static void ep_rbtree_insert(struct even
 }
 
 
-static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
+int ep_insert(struct eventpoll *ep, struct epoll_event *event,
 		     struct file *tfile, int fd)
 {
 	int error, revents, pwake = 0;
@@ -1042,6 +961,7 @@ eexit_2:
 eexit_1:
 	return error;
 }
+EXPORT_SYMBOL_GPL(ep_insert);
 
 
 /*
diff -upr linux-2.6.16.46-0.12.orig/fs/exec.c linux-2.6.16.46-0.12-027test011/fs/exec.c
--- linux-2.6.16.46-0.12.orig/fs/exec.c	2007-08-24 19:28:30.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/exec.c	2007-08-28 17:35:36.000000000 +0400
@@ -26,6 +26,7 @@
 #include <linux/slab.h>
 #include <linux/file.h>
 #include <linux/mman.h>
+#include <linux/virtinfo.h>
 #include <linux/a.out.h>
 #include <linux/stat.h>
 #include <linux/fcntl.h>
@@ -55,6 +56,8 @@
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
 
+#include <ub/ub_vmpages.h>
+
 #ifdef CONFIG_KMOD
 #include <linux/kmod.h>
 #endif
@@ -66,6 +69,8 @@ int suid_dumpable = 0;
 EXPORT_SYMBOL(suid_dumpable);
 /* The maximal length of core_pattern is also specified in sysctl.c */
 
+int sysctl_at_vsyscall;
+
 static struct linux_binfmt *formats;
 static DEFINE_RWLOCK(binfmt_lock);
 
@@ -311,6 +316,10 @@ void install_arg_page(struct vm_area_str
 	pte_t * pte;
 	pte_t pte_val;
 	spinlock_t *ptl;
+	struct page_beancounter *pb;
+
+	if (unlikely(pb_alloc(&pb)))
+		goto out_nopb;
 
 	if (unlikely(anon_vma_prepare(vma)))
 		goto out;
@@ -328,12 +337,17 @@ void install_arg_page(struct vm_area_str
 	pte_val = pte_mkdirty(pte_mkwrite(mk_pte(page, vma->vm_page_prot)));
 	page_check_writable(page, pte_val, 2);
 	set_pte_at(mm, address, pte, pte_val);
+	pb_add_ref(page, mm, &pb);
+	ub_unused_privvm_dec(mm, vma);
+	pb_free(&pb);
 	page_add_new_anon_rmap(page, vma, address);
 	pte_unmap_unlock(pte, ptl);
 
 	/* no need for flush_tlb */
 	return;
 out:
+	pb_free(&pb);
+out_nopb:
 	__free_page(page);
 	force_sig(SIGKILL, current);
 }
@@ -408,9 +422,14 @@ int setup_arg_pages(struct linux_binprm 
 		bprm->loader += stack_base;
 	bprm->exec += stack_base;
 
-	mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	ret = -ENOMEM;
+	if (ub_memory_charge(mm, arg_size, VM_STACK_FLAGS | mm->def_flags,
+				NULL, UB_SOFT))
+		goto fail_charge;
+
+	mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL | __GFP_SOFT_UBC);
 	if (!mpnt)
-		return -ENOMEM;
+		goto fail_alloc;
 
 	memset(mpnt, 0, sizeof(*mpnt));
 
@@ -435,11 +454,8 @@ int setup_arg_pages(struct linux_binprm 
 			mpnt->vm_flags = VM_STACK_FLAGS;
 		mpnt->vm_flags |= mm->def_flags;
 		mpnt->vm_page_prot = protection_map[mpnt->vm_flags & 0x7];
-		if ((ret = insert_vm_struct(mm, mpnt))) {
-			up_write(&mm->mmap_sem);
-			kmem_cache_free(vm_area_cachep, mpnt);
-			return ret;
-		}
+		if ((ret = insert_vm_struct(mm, mpnt)))
+			goto fail_insert;
 		mm->stack_vm = mm->total_vm = vma_pages(mpnt);
 	}
 
@@ -454,6 +470,14 @@ int setup_arg_pages(struct linux_binprm 
 	up_write(&mm->mmap_sem);
 	
 	return 0;
+
+fail_insert:
+	up_write(&mm->mmap_sem);
+	kmem_cache_free(vm_area_cachep, mpnt);
+fail_alloc:
+	ub_memory_uncharge(mm, arg_size, VM_STACK_FLAGS | mm->def_flags, NULL);
+fail_charge:
+	return ret;
 }
 
 EXPORT_SYMBOL(setup_arg_pages);
@@ -538,10 +562,11 @@ int kernel_read(struct file *file, unsig
 
 EXPORT_SYMBOL(kernel_read);
 
-static int exec_mmap(struct mm_struct *mm)
+static int exec_mmap(struct linux_binprm *bprm)
 {
 	struct task_struct *tsk;
-	struct mm_struct * old_mm, *active_mm;
+	struct mm_struct *old_mm, *active_mm, *mm;
+	int ret;
 
 	/* Notify parent that we're no longer interested in the old VM */
 	tsk = current;
@@ -563,6 +588,9 @@ static int exec_mmap(struct mm_struct *m
 			return -EINTR;
 		}
 	}
+	ret = 0;
+	mm = bprm->mm;
+	mm->vps_dumpable = 1;
 	task_lock(tsk);
 	active_mm = tsk->active_mm;
 	tsk->mm = mm;
@@ -570,14 +598,24 @@ static int exec_mmap(struct mm_struct *m
 	activate_mm(active_mm, mm);
 	task_unlock(tsk);
 	arch_pick_mmap_layout(mm);
+	bprm->mm = NULL;		/* We're using it now */
+
+#ifdef CONFIG_VZ_GENCALLS
+	if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_EXECMMAP,
+				bprm) & NOTIFY_FAIL) {
+		/* similar to binfmt_elf */
+		send_sig(SIGKILL, current, 0);
+		ret = -ENOMEM;
+	}
+#endif
 	if (old_mm) {
 		up_read(&old_mm->mmap_sem);
 		if (active_mm != old_mm) BUG();
 		mmput(old_mm);
-		return 0;
+		return ret;
 	}
 	mmdrop(active_mm);
-	return 0;
+	return ret;
 }
 
 /*
@@ -668,7 +706,7 @@ static int de_thread(struct task_struct 
 	 */
 	if (!thread_group_leader(current)) {
 		struct task_struct *parent;
-		struct dentry *proc_dentry1, *proc_dentry2;
+		struct dentry *proc_dentry1[2], *proc_dentry2[2];
 		unsigned long ptrace;
 
 		/*
@@ -682,8 +720,8 @@ static int de_thread(struct task_struct 
 
 		spin_lock(&leader->proc_lock);
 		spin_lock(&current->proc_lock);
-		proc_dentry1 = proc_pid_unhash(current);
-		proc_dentry2 = proc_pid_unhash(leader);
+		proc_pid_unhash(current, proc_dentry1);
+		proc_pid_unhash(leader, proc_dentry2);
 		write_lock_irq(&tasklist_lock);
 
 		BUG_ON(leader->tgid != current->tgid);
@@ -840,7 +878,7 @@ int flush_old_exec(struct linux_binprm *
 {
 	char * name;
 	int i, ch, retval;
-	struct files_struct *files;
+	struct files_struct *files, *old;
 	char tcomm[sizeof(current->comm)];
 
 	/*
@@ -863,12 +901,10 @@ int flush_old_exec(struct linux_binprm *
 	/*
 	 * Release all of the old mmap stuff
 	 */
-	retval = exec_mmap(bprm->mm);
+	retval = exec_mmap(bprm);
 	if (retval)
 		goto mmap_failed;
 
-	bprm->mm = NULL;		/* We're using it now */
-
 	/* This is the point of no return */
 	steal_locks(files);
 	put_files_struct(files);
@@ -920,8 +956,11 @@ int flush_old_exec(struct linux_binprm *
 	return 0;
 
 mmap_failed:
-	put_files_struct(current->files);
+	old = current->files;
+	task_lock(current);
 	current->files = files;
+	task_unlock(current);
+	put_files_struct(old);
 out:
 	return retval;
 }
@@ -1173,6 +1212,10 @@ int do_execve(char * filename,
 	int retval;
 	int i;
 
+	retval = virtinfo_gencall(VIRTINFO_DOEXECVE, NULL);
+	if (retval)
+		return retval;
+
 	retval = -ENOMEM;
 	bprm = kmalloc(sizeof(*bprm), GFP_KERNEL);
 	if (!bprm)
@@ -1320,7 +1363,7 @@ static void format_corename(char *corena
 			case 'p':
 				pid_in_pattern = 1;
 				rc = snprintf(out_ptr, out_end - out_ptr,
-					      "%d", current->tgid);
+					      "%d", virt_tgid(current));
 				if (rc > out_end - out_ptr)
 					goto out;
 				out_ptr += rc;
@@ -1364,7 +1407,7 @@ static void format_corename(char *corena
 			case 'h':
 				down_read(&uts_sem);
 				rc = snprintf(out_ptr, out_end - out_ptr,
-					      "%s", system_utsname.nodename);
+					      "%s", ve_utsname.nodename);
 				up_read(&uts_sem);
 				if (rc > out_end - out_ptr)
 					goto out;
@@ -1392,7 +1435,7 @@ static void format_corename(char *corena
 	if (!pid_in_pattern
             && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) {
 		rc = snprintf(out_ptr, out_end - out_ptr,
-			      ".%d", current->tgid);
+			      ".%d", virt_tgid(current));
 		if (rc > out_end - out_ptr)
 			goto out;
 		out_ptr += rc;
@@ -1418,7 +1461,7 @@ static void zap_threads (struct mm_struc
 	}
 
 	read_lock(&tasklist_lock);
-	do_each_thread(g,p)
+	do_each_thread_ve(g,p)
 		if (mm == p->mm && p != tsk) {
 			force_sig_specific(SIGKILL, p);
 			mm->core_waiters++;
@@ -1426,7 +1469,7 @@ static void zap_threads (struct mm_struc
 			    unlikely(p->parent->mm == mm))
 				traced = 1;
 		}
-	while_each_thread(g,p);
+	while_each_thread_ve(g,p);
 
 	read_unlock(&tasklist_lock);
 
@@ -1438,12 +1481,12 @@ static void zap_threads (struct mm_struc
 		 * coredump to finish.  Detach them so they can both die.
 		 */
 		write_lock_irq(&tasklist_lock);
-		do_each_thread(g,p) {
+		do_each_thread_ve(g,p) {
 			if (mm == p->mm && p != tsk &&
 			    p->ptrace && p->parent->mm == mm) {
 				__ptrace_detach(p, 0);
 			}
-		} while_each_thread(g,p);
+		} while_each_thread_ve(g,p);
 		write_unlock_irq(&tasklist_lock);
 	}
 }
@@ -1479,7 +1522,7 @@ int do_coredump(long signr, int exit_cod
 	if (!binfmt || !binfmt->core_dump)
 		goto fail;
 	down_write(&mm->mmap_sem);
-	if (!mm->dumpable) {
+	if (!mm->dumpable || mm->vps_dumpable != 1) {
 		up_write(&mm->mmap_sem);
 		goto fail;
 	}
diff -upr linux-2.6.16.46-0.12.orig/fs/ext2/namei.c linux-2.6.16.46-0.12-027test011/fs/ext2/namei.c
--- linux-2.6.16.46-0.12.orig/fs/ext2/namei.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/ext2/namei.c	2007-08-28 17:35:33.000000000 +0400
@@ -31,6 +31,7 @@
  */
 
 #include <linux/pagemap.h>
+#include <linux/quotaops.h>
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
@@ -273,6 +274,8 @@ static int ext2_unlink(struct inode * di
 	struct page * page;
 	int err = -ENOENT;
 
+	DQUOT_INIT(inode);
+
 	de = ext2_find_entry (dir, dentry, &page);
 	if (!de)
 		goto out;
@@ -315,6 +318,9 @@ static int ext2_rename (struct inode * o
 	struct ext2_dir_entry_2 * old_de;
 	int err = -ENOENT;
 
+	if (new_inode)
+		DQUOT_INIT(new_inode);
+
 	old_de = ext2_find_entry (old_dir, old_dentry, &old_page);
 	if (!old_de)
 		goto out;
diff -upr linux-2.6.16.46-0.12.orig/fs/ext2/super.c linux-2.6.16.46-0.12-027test011/fs/ext2/super.c
--- linux-2.6.16.46-0.12.orig/fs/ext2/super.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/ext2/super.c	2007-08-28 17:35:31.000000000 +0400
@@ -371,7 +371,6 @@ static int parse_options (char * options
 {
 	char * p;
 	substring_t args[MAX_OPT_ARGS];
-	unsigned long kind = EXT2_MOUNT_ERRORS_CONT;
 	int option;
 
 	if (!options)
@@ -411,13 +410,19 @@ static int parse_options (char * options
 			/* *sb_block = match_int(&args[0]); */
 			break;
 		case Opt_err_panic:
-			kind = EXT2_MOUNT_ERRORS_PANIC;
+			clear_opt (sbi->s_mount_opt, ERRORS_CONT);
+			clear_opt (sbi->s_mount_opt, ERRORS_RO);
+			set_opt (sbi->s_mount_opt, ERRORS_PANIC);
 			break;
 		case Opt_err_ro:
-			kind = EXT2_MOUNT_ERRORS_RO;
+			clear_opt (sbi->s_mount_opt, ERRORS_CONT);
+			clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
+			set_opt (sbi->s_mount_opt, ERRORS_RO);
 			break;
 		case Opt_err_cont:
-			kind = EXT2_MOUNT_ERRORS_CONT;
+			clear_opt (sbi->s_mount_opt, ERRORS_RO);
+			clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
+			set_opt (sbi->s_mount_opt, ERRORS_CONT);
 			break;
 		case Opt_nouid32:
 			set_opt (sbi->s_mount_opt, NO_UID32);
@@ -496,7 +501,6 @@ static int parse_options (char * options
 			return 0;
 		}
 	}
-	sbi->s_mount_opt |= kind;
 	return 1;
 }
 
@@ -720,6 +724,8 @@ static int ext2_fill_super(struct super_
 		set_opt(sbi->s_mount_opt, ERRORS_PANIC);
 	else if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_RO)
 		set_opt(sbi->s_mount_opt, ERRORS_RO);
+	else
+		set_opt(sbi->s_mount_opt, ERRORS_CONT);
 
 	sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
 	sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
@@ -1042,7 +1048,7 @@ static int ext2_remount (struct super_bl
 	es = sbi->s_es;
 	if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) !=
 	    (old_mount_opt & EXT2_MOUNT_XIP)) &&
-	    invalidate_inodes(sb))
+	    invalidate_inodes(sb, 0))
 		ext2_warning(sb, __FUNCTION__, "busy inodes while remounting "\
 			     "xip remain in cache (no functional problem)");
 	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
@@ -1251,7 +1257,7 @@ static struct file_system_type ext2_fs_t
 	.name		= "ext2",
 	.get_sb		= ext2_get_sb,
 	.kill_sb	= kill_block_super,
-	.fs_flags	= FS_REQUIRES_DEV,
+	.fs_flags	= FS_REQUIRES_DEV | FS_VIRTUALIZED,
 };
 
 static int __init init_ext2_fs(void)
diff -upr linux-2.6.16.46-0.12.orig/fs/ext3/inode.c linux-2.6.16.46-0.12-027test011/fs/ext3/inode.c
--- linux-2.6.16.46-0.12.orig/fs/ext3/inode.c	2007-08-24 19:28:30.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/ext3/inode.c	2007-08-28 17:35:30.000000000 +0400
@@ -2524,8 +2524,10 @@ void ext3_read_inode(struct inode * inod
 		 */
 		ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
 		if (EXT3_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
-		    EXT3_INODE_SIZE(inode->i_sb))
+		    EXT3_INODE_SIZE(inode->i_sb)) {
+			brelse(bh);
 			goto bad_inode;
+		}
 		if (ei->i_extra_isize == 0) {
 			/* The extra space is currently unused. Use it. */
 			ei->i_extra_isize = sizeof(struct ext3_inode) -
diff -upr linux-2.6.16.46-0.12.orig/fs/ext3/namei.c linux-2.6.16.46-0.12-027test011/fs/ext3/namei.c
--- linux-2.6.16.46-0.12.orig/fs/ext3/namei.c	2007-08-24 19:28:09.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/ext3/namei.c	2007-08-28 17:35:30.000000000 +0400
@@ -968,6 +968,7 @@ static struct buffer_head * ext3_dx_find
 				  (block<<EXT3_BLOCK_SIZE_BITS(sb))
 					  +((char *)de - bh->b_data))) {
 				brelse (bh);
+				*err = ERR_BAD_DX_DIR;
 				goto errout;
 			}
 			*res_dir = de;
@@ -1018,6 +1019,11 @@ static struct dentry *ext3_lookup(struct
 
 		if (!inode)
 			return ERR_PTR(-EACCES);
+
+		if (is_bad_inode(inode)) {
+			iput(inode);
+			return ERR_PTR(-ENOENT);	
+		}
 	}
 	return d_splice_alias(inode, dentry);
 }
@@ -1053,6 +1059,11 @@ struct dentry *ext3_get_parent(struct de
 	if (!inode)
 		return ERR_PTR(-EACCES);
 
+	if (is_bad_inode(inode)) {
+		iput(inode);
+		return ERR_PTR(-ENOENT);	
+	}
+
 	parent = d_alloc_anon(inode);
 	if (!parent) {
 		iput(inode);
@@ -1133,9 +1144,9 @@ static struct ext3_dir_entry_2 *do_split
 	char *data1 = (*bh)->b_data, *data2;
 	unsigned split;
 	struct ext3_dir_entry_2 *de = NULL, *de2;
-	int	err;
+	int	err = 0;
 
-	bh2 = ext3_append (handle, dir, &newblock, error);
+	bh2 = ext3_append (handle, dir, &newblock, &err);
 	if (!(bh2)) {
 		brelse(*bh);
 		*bh = NULL;
@@ -1144,14 +1155,9 @@ static struct ext3_dir_entry_2 *do_split
 
 	BUFFER_TRACE(*bh, "get_write_access");
 	err = ext3_journal_get_write_access(handle, *bh);
-	if (err) {
-	journal_error:
-		brelse(*bh);
-		brelse(bh2);
-		*bh = NULL;
-		ext3_std_error(dir->i_sb, err);
-		goto errout;
-	}
+	if (err)
+		goto journal_error;
+
 	BUFFER_TRACE(frame->bh, "get_write_access");
 	err = ext3_journal_get_write_access(handle, frame->bh);
 	if (err)
@@ -1194,8 +1200,16 @@ static struct ext3_dir_entry_2 *do_split
 		goto journal_error;
 	brelse (bh2);
 	dxtrace(dx_show_index ("frame", frame->entries));
-errout:
 	return de;
+
+journal_error:
+	brelse(*bh);
+	brelse(bh2);
+	*bh = NULL;
+	ext3_std_error(dir->i_sb, err);
+errout:
+	*error = err;
+	return NULL;
 }
 #endif
 
@@ -1292,7 +1306,7 @@ static int add_dirent_to_buf(handle_t *h
 	if (err)
 		ext3_std_error(dir->i_sb, err);
 	brelse(bh);
-	return 0;
+	return err;
 }
 
 #ifdef CONFIG_EXT3_INDEX
@@ -2192,6 +2206,12 @@ static int ext3_link (struct dentry * ol
 
 	if (inode->i_nlink >= EXT3_LINK_MAX)
 		return -EMLINK;
+	/*
+	 * Return -ENOENT if we've raced with unlink and i_nlink is 0.  Doing
+	 * otherwise has the potential to corrupt the orphan inode list.
+	 */
+	if (inode->i_nlink == 0)
+		return -ENOENT;
 
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
diff -upr linux-2.6.16.46-0.12.orig/fs/ext3/super.c linux-2.6.16.46-0.12-027test011/fs/ext3/super.c
--- linux-2.6.16.46-0.12.orig/fs/ext3/super.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/ext3/super.c	2007-08-28 17:35:31.000000000 +0400
@@ -160,20 +160,21 @@ static void ext3_handle_error(struct sup
 	if (sb->s_flags & MS_RDONLY)
 		return;
 
-	if (test_opt (sb, ERRORS_RO)) {
-		printk (KERN_CRIT "Remounting filesystem read-only\n");
-		sb->s_flags |= MS_RDONLY;
-	} else {
+	if (!test_opt (sb, ERRORS_CONT)) {
 		journal_t *journal = EXT3_SB(sb)->s_journal;
 
 		EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
 		if (journal)
 			journal_abort(journal, -EIO);
 	}
-	if (test_opt(sb, ERRORS_PANIC))
-		panic("EXT3-fs (device %s): panic forced after error\n",
-			sb->s_id);
+	if (test_opt (sb, ERRORS_RO)) {
+		printk (KERN_CRIT "Remounting filesystem read-only\n");
+		sb->s_flags |= MS_RDONLY;
+	}
 	ext3_commit_super(sb, es, 1);
+	if (test_opt (sb, ERRORS_PANIC))
+		panic ("EXT3-fs (device %s): panic forced after error\n",
+				sb->s_id);
 }
 
 void ext3_error (struct super_block * sb, const char * function,
@@ -1464,6 +1465,8 @@ static int ext3_fill_super (struct super
 		set_opt(sbi->s_mount_opt, ERRORS_PANIC);
 	else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_RO)
 		set_opt(sbi->s_mount_opt, ERRORS_RO);
+	else
+		set_opt(sbi->s_mount_opt, ERRORS_CONT);
 
 	sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
 	sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
@@ -2708,7 +2711,7 @@ static struct file_system_type ext3_fs_t
 	.name		= "ext3",
 	.get_sb		= ext3_get_sb,
 	.kill_sb	= kill_block_super,
-	.fs_flags	= FS_REQUIRES_DEV,
+	.fs_flags	= FS_REQUIRES_DEV | FS_VIRTUALIZED,
 };
 
 static int __init init_ext3_fs(void)
diff -upr linux-2.6.16.46-0.12.orig/fs/ext3/xattr.c linux-2.6.16.46-0.12-027test011/fs/ext3/xattr.c
--- linux-2.6.16.46-0.12.orig/fs/ext3/xattr.c	2007-08-24 19:28:27.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/ext3/xattr.c	2007-08-28 17:35:30.000000000 +0400
@@ -478,8 +478,15 @@ ext3_xattr_release_block(handle_t *handl
 			 struct buffer_head *bh)
 {
 	struct mb_cache_entry *ce = NULL;
+	int error = 0;
 
 	ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev, bh->b_blocknr);
+	error = ext3_journal_get_write_access(handle, bh);
+	if (error)
+		 goto out;
+
+	lock_buffer(bh);
+
 	if (BHDR(bh)->h_refcount == cpu_to_le32(1)) {
 		ea_bdebug(bh, "refcount now=0; freeing");
 		if (ce)
@@ -488,21 +495,21 @@ ext3_xattr_release_block(handle_t *handl
 		get_bh(bh);
 		ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
 	} else {
-		if (ext3_journal_get_write_access(handle, bh) == 0) {
-			lock_buffer(bh);
-			BHDR(bh)->h_refcount = cpu_to_le32(
+		BHDR(bh)->h_refcount = cpu_to_le32(
 				le32_to_cpu(BHDR(bh)->h_refcount) - 1);
-			ext3_journal_dirty_metadata(handle, bh);
-			if (IS_SYNC(inode))
-				handle->h_sync = 1;
-			DQUOT_FREE_BLOCK(inode, 1);
-			unlock_buffer(bh);
-			ea_bdebug(bh, "refcount now=%d; releasing",
-				  le32_to_cpu(BHDR(bh)->h_refcount));
-		}
+		error = ext3_journal_dirty_metadata(handle, bh);
+		if (IS_SYNC(inode))
+			handle->h_sync = 1;
+		DQUOT_FREE_BLOCK(inode, 1);
+		ea_bdebug(bh, "refcount now=%d; releasing",
+			  le32_to_cpu(BHDR(bh)->h_refcount));
 		if (ce)
 			mb_cache_entry_release(ce);
 	}
+	unlock_buffer(bh);
+out:
+	ext3_std_error(inode->i_sb, error);
+	return;
 }
 
 struct ext3_xattr_info {
@@ -678,7 +685,7 @@ ext3_xattr_block_set(handle_t *handle, s
 	struct buffer_head *new_bh = NULL;
 	struct ext3_xattr_search *s = &bs->s;
 	struct mb_cache_entry *ce = NULL;
-	int error;
+	int error = 0;
 
 #define header(x) ((struct ext3_xattr_header *)(x))
 
@@ -687,16 +694,17 @@ ext3_xattr_block_set(handle_t *handle, s
 	if (s->base) {
 		ce = mb_cache_entry_get(ext3_xattr_cache, bs->bh->b_bdev,
 					bs->bh->b_blocknr);
+		error = ext3_journal_get_write_access(handle, bs->bh);
+		if (error)
+			goto cleanup;
+		lock_buffer(bs->bh);
+
 		if (header(s->base)->h_refcount == cpu_to_le32(1)) {
 			if (ce) {
 				mb_cache_entry_free(ce);
 				ce = NULL;
 			}
 			ea_bdebug(bs->bh, "modifying in-place");
-			error = ext3_journal_get_write_access(handle, bs->bh);
-			if (error)
-				goto cleanup;
-			lock_buffer(bs->bh);
 			error = ext3_xattr_set_entry(i, s);
 			if (!error) {
 				if (!IS_LAST_ENTRY(s->first))
@@ -716,6 +724,9 @@ ext3_xattr_block_set(handle_t *handle, s
 		} else {
 			int offset = (char *)s->here - bs->bh->b_data;
 
+			unlock_buffer(bs->bh);
+			journal_release_buffer(handle, bs->bh);
+
 			if (ce) {
 				mb_cache_entry_release(ce);
 				ce = NULL;
diff -upr linux-2.6.16.46-0.12.orig/fs/fcntl.c linux-2.6.16.46-0.12-027test011/fs/fcntl.c
--- linux-2.6.16.46-0.12.orig/fs/fcntl.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/fcntl.c	2007-08-28 17:35:36.000000000 +0400
@@ -112,7 +112,7 @@ out:
 	return error;
 }
 
-static int dupfd(struct file *file, unsigned int start)
+int dupfd(struct file *file, unsigned int start)
 {
 	struct files_struct * files = current->files;
 	struct fdtable *fdt;
@@ -135,6 +135,8 @@ static int dupfd(struct file *file, unsi
 	return fd;
 }
 
+EXPORT_SYMBOL(dupfd);
+
 asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
 {
 	int err = -EBADF;
@@ -190,6 +192,7 @@ out_fput:
 	fput(file);
 	goto out;
 }
+EXPORT_SYMBOL_GPL(sys_dup2);
 
 asmlinkage long sys_dup(unsigned int fildes)
 {
@@ -254,6 +257,7 @@ static int setfl(int fd, struct file * f
 static void f_modown(struct file *filp, unsigned long pid,
                      uid_t uid, uid_t euid, int force)
 {
+	pid = comb_vpid_to_pid(pid);
 	write_lock_irq(&filp->f_owner.lock);
 	if (force || !filp->f_owner.pid) {
 		filp->f_owner.pid = pid;
@@ -320,7 +324,7 @@ static long do_fcntl(int fd, unsigned in
 		 * current syscall conventions, the only way
 		 * to fix this will be in libc.
 		 */
-		err = filp->f_owner.pid;
+		err = comb_pid_to_vpid(filp->f_owner.pid);
 		force_successful_syscall_return();
 		break;
 	case F_SETOWN:
@@ -472,23 +476,29 @@ static void send_sigio_to_task(struct ta
 void send_sigio(struct fown_struct *fown, int fd, int band)
 {
 	struct task_struct *p;
+	struct file *f;
+	struct ve_struct *ve;
 	int pid;
 	
 	read_lock(&fown->lock);
 	pid = fown->pid;
 	if (!pid)
 		goto out_unlock_fown;
+
+	/* hack: fown's are always embedded in struct file */
+	f = container_of(fown, struct file, f_owner);
+	ve = f->owner_env;
 	
 	read_lock(&tasklist_lock);
 	if (pid > 0) {
-		p = find_task_by_pid(pid);
-		if (p) {
+		p = find_task_by_pid_all(pid);
+		if (p && ve_accessible(VE_TASK_INFO(p)->owner_env, ve)) {
 			send_sigio_to_task(p, fown, fd, band);
 		}
 	} else {
-		do_each_task_pid(-pid, PIDTYPE_PGID, p) {
+		__do_each_task_pid_ve(-pid, PIDTYPE_PGID, p, ve) {
 			send_sigio_to_task(p, fown, fd, band);
-		} while_each_task_pid(-pid, PIDTYPE_PGID, p);
+		} __while_each_task_pid_ve(-pid, PIDTYPE_PGID, p, ve);
 	}
 	read_unlock(&tasklist_lock);
  out_unlock_fown:
@@ -505,6 +515,8 @@ static void send_sigurg_to_task(struct t
 int send_sigurg(struct fown_struct *fown)
 {
 	struct task_struct *p;
+	struct file *f;
+	struct ve_struct *ve;
 	int pid, ret = 0;
 	
 	read_lock(&fown->lock);
@@ -513,17 +525,19 @@ int send_sigurg(struct fown_struct *fown
 		goto out_unlock_fown;
 
 	ret = 1;
+	f = container_of(fown, struct file, f_owner);
+	ve = f->owner_env;
 	
 	read_lock(&tasklist_lock);
 	if (pid > 0) {
-		p = find_task_by_pid(pid);
-		if (p) {
+		p = find_task_by_pid_all(pid);
+		if (p && ve_accessible(VE_TASK_INFO(p)->owner_env, ve)) {
 			send_sigurg_to_task(p, fown);
 		}
 	} else {
-		do_each_task_pid(-pid, PIDTYPE_PGID, p) {
+		__do_each_task_pid_ve(-pid, PIDTYPE_PGID, p, ve) {
 			send_sigurg_to_task(p, fown);
-		} while_each_task_pid(-pid, PIDTYPE_PGID, p);
+		} __while_each_task_pid_ve(-pid, PIDTYPE_PGID, p, ve);
 	}
 	read_unlock(&tasklist_lock);
  out_unlock_fown:
diff -upr linux-2.6.16.46-0.12.orig/fs/file.c linux-2.6.16.46-0.12-027test011/fs/file.c
--- linux-2.6.16.46-0.12.orig/fs/file.c	2007-08-24 19:28:08.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/file.c	2007-08-28 17:35:33.000000000 +0400
@@ -8,6 +8,7 @@
 
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/module.h>
 #include <linux/time.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
@@ -18,6 +19,8 @@
 #include <linux/rcupdate.h>
 #include <linux/workqueue.h>
 
+#include <ub/ub_mem.h>
+
 struct fdtable_defer {
 	spinlock_t lock;
 	struct work_struct wq;
@@ -44,9 +47,9 @@ struct file ** alloc_fd_array(int num)
 	int size = num * sizeof(struct file *);
 
 	if (size <= PAGE_SIZE)
-		new_fds = (struct file **) kmalloc(size, GFP_KERNEL);
+		new_fds = (struct file **) ub_kmalloc(size, GFP_KERNEL);
 	else 
-		new_fds = (struct file **) vmalloc(size);
+		new_fds = (struct file **) ub_vmalloc(size);
 	return new_fds;
 }
 
@@ -212,9 +215,9 @@ fd_set * alloc_fdset(int num)
 	int size = num / 8;
 
 	if (size <= PAGE_SIZE)
-		new_fdset = (fd_set *) kmalloc(size, GFP_KERNEL);
+		new_fdset = (fd_set *) ub_kmalloc(size, GFP_KERNEL);
 	else
-		new_fdset = (fd_set *) vmalloc(size);
+		new_fdset = (fd_set *) ub_vmalloc(size);
 	return new_fdset;
 }
 
@@ -304,7 +307,7 @@ out:
  * both fd array and fdset. It is expected to be called with the
  * files_lock held.
  */
-static int expand_fdtable(struct files_struct *files, int nr)
+int expand_fdtable(struct files_struct *files, int nr)
 	__releases(files->file_lock)
 	__acquires(files->file_lock)
 {
@@ -340,6 +343,7 @@ static int expand_fdtable(struct files_s
 out:
 	return error;
 }
+EXPORT_SYMBOL_GPL(expand_fdtable);
 
 /*
  * Expand files.
diff -upr linux-2.6.16.46-0.12.orig/fs/file_table.c linux-2.6.16.46-0.12-027test011/fs/file_table.c
--- linux-2.6.16.46-0.12.orig/fs/file_table.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/file_table.c	2007-08-28 17:35:32.000000000 +0400
@@ -25,6 +25,10 @@
 
 #include <asm/atomic.h>
 
+#include <ub/beancounter.h>
+#include <ub/ub_mem.h>
+#include <ub/ub_misc.h>
+
 /* sysctl tunables... */
 struct files_stat_struct files_stat = {
 	.max_files = NR_FILE
@@ -37,7 +41,10 @@ static struct percpu_counter nr_files __
 
 static inline void file_free(struct file *f)
 {
-	percpu_counter_dec(&nr_files);
+	if (f->f_ub == get_ub0())
+		percpu_counter_dec(&nr_files);
+	ub_file_uncharge(f);
+	put_ve(f->owner_env);
 	kmem_cache_free(filp_cachep, f);
 }
 
@@ -84,11 +91,14 @@ struct file *get_empty_filp(void)
 {
 	static int old_max;
 	struct file * f;
+	int acct;
 
+	acct = (get_exec_ub() == get_ub0());
 	/*
 	 * Privileged users can go above max_files
 	 */
-	if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) {
+	if (acct && get_nr_files() >= files_stat.max_files &&
+			!capable(CAP_SYS_ADMIN)) {
 		/*
 		 * percpu_counters are inaccurate.  Do an expensive check before
 		 * we go and fail.
@@ -101,8 +111,15 @@ struct file *get_empty_filp(void)
 	if (f == NULL)
 		goto fail;
 
-	percpu_counter_inc(&nr_files);
 	memset(f, 0, sizeof(*f));
+
+	if (ub_file_charge(f))
+		goto fail_ch;
+
+	f->owner_env = get_ve(get_exec_env());
+	if (acct)
+		percpu_counter_inc(&nr_files);
+
 	if (security_file_alloc(f))
 		goto fail_sec;
 
@@ -128,6 +145,10 @@ fail_sec:
 	file_free(f);
 fail:
 	return NULL;
+
+fail_ch:
+	kmem_cache_free(filp_cachep, f);
+	return NULL;
 }
 
 EXPORT_SYMBOL(get_empty_filp);
diff -upr linux-2.6.16.46-0.12.orig/fs/filesystems.c linux-2.6.16.46-0.12-027test011/fs/filesystems.c
--- linux-2.6.16.46-0.12.orig/fs/filesystems.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/filesystems.c	2007-08-28 17:35:31.000000000 +0400
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/sched.h>	/* for 'current' */
+#include <linux/mount.h>
 #include <asm/uaccess.h>
 
 /*
@@ -22,8 +23,8 @@
  *	During the unload module must call unregister_filesystem().
  *	We can access the fields of list element if:
  *		1) spinlock is held or
- *		2) we hold the reference to the module.
- *	The latter can be guaranteed by call of try_module_get(); if it
+ *		2) we hold the reference to the element.
+ *	The latter can be guaranteed by call of try_filesystem(); if it
  *	returned 0 we must skip the element, otherwise we got the reference.
  *	Once the reference is obtained we can drop the spinlock.
  */
@@ -31,23 +32,45 @@
 static struct file_system_type *file_systems;
 static DEFINE_RWLOCK(file_systems_lock);
 
+int try_get_filesystem(struct file_system_type *fs)
+{
+	if (try_module_get(fs->owner)) {
+		get_ve(fs->owner_env);
+		return 1;
+	}
+	return 0;
+}
+
 /* WARNING: This can be used only if we _already_ own a reference */
 void get_filesystem(struct file_system_type *fs)
 {
+	get_ve(fs->owner_env);
 	__module_get(fs->owner);
 }
 
 void put_filesystem(struct file_system_type *fs)
 {
 	module_put(fs->owner);
+	put_ve(fs->owner_env);
+}
+
+static inline int check_ve_fstype(struct file_system_type *p,
+		struct ve_struct *env)
+{
+	return ((p->fs_flags & FS_VIRTUALIZED) ||
+			ve_accessible_strict(p->owner_env, env));
 }
 
-static struct file_system_type **find_filesystem(const char *name)
+static struct file_system_type **find_filesystem(const char *name,
+		struct ve_struct *env)
 {
 	struct file_system_type **p;
-	for (p=&file_systems; *p; p=&(*p)->next)
+	for (p=&file_systems; *p; p=&(*p)->next) {
+		if (!check_ve_fstype(*p, env))
+			continue;
 		if (strcmp((*p)->name,name) == 0)
 			break;
+	}
 	return p;
 }
 
@@ -74,8 +97,10 @@ int register_filesystem(struct file_syst
 	if (fs->next)
 		return -EBUSY;
 	INIT_LIST_HEAD(&fs->fs_supers);
+	if (fs->owner_env == NULL)
+		fs->owner_env = get_ve0();
 	write_lock(&file_systems_lock);
-	p = find_filesystem(fs->name);
+	p = find_filesystem(fs->name, fs->owner_env);
 	if (*p)
 		res = -EBUSY;
 	else
@@ -119,6 +144,74 @@ int unregister_filesystem(struct file_sy
 
 EXPORT_SYMBOL(unregister_filesystem);
 
+#ifdef CONFIG_VE
+int register_ve_fs_type(struct ve_struct *ve, struct file_system_type *template,
+		struct file_system_type **p_fs_type, struct vfsmount **p_mnt)
+{
+	struct vfsmount *mnt;
+	struct file_system_type *local_fs_type;
+	int ret;
+
+	local_fs_type = kzalloc(sizeof(*local_fs_type) + sizeof(void *),
+					GFP_KERNEL);
+	if (local_fs_type == NULL)
+		return -ENOMEM;
+
+	local_fs_type->name = template->name;
+	local_fs_type->fs_flags = template->fs_flags;
+	local_fs_type->get_sb = template->get_sb;
+	local_fs_type->kill_sb = template->kill_sb;
+	local_fs_type->owner = template->owner;
+	local_fs_type->owner_env = ve;
+
+	get_filesystem(local_fs_type);	/* get_ve() inside */
+
+	ret = register_filesystem(local_fs_type);
+	if (ret)
+		goto reg_err;
+
+	if (p_mnt == NULL) 
+		goto done; 
+
+	mnt = kern_mount(local_fs_type);
+	if (IS_ERR(mnt))
+		goto mnt_err;
+
+	*p_mnt = mnt;
+done:
+	*p_fs_type = local_fs_type;
+	return 0;
+
+mnt_err:
+	ret = PTR_ERR(mnt);
+	unregister_filesystem(local_fs_type); /* does not put */
+
+reg_err:
+	put_filesystem(local_fs_type);
+	kfree(local_fs_type);
+	printk(KERN_DEBUG
+	       "register_ve_fs_type(\"%s\") err=%d\n", template->name, ret);
+	return ret;
+}
+
+EXPORT_SYMBOL(register_ve_fs_type);
+
+void unregister_ve_fs_type(struct file_system_type *local_fs_type,
+		struct vfsmount *local_fs_mount)
+{
+	if (local_fs_mount == NULL && local_fs_type == NULL)
+		return;
+
+	unregister_filesystem(local_fs_type);
+	umount_ve_fs_type(local_fs_type);
+	if (local_fs_mount)
+		kern_umount(local_fs_mount); /* alias to mntput, drop our ref */
+	put_filesystem(local_fs_type);
+}
+
+EXPORT_SYMBOL(unregister_ve_fs_type);
+#endif
+
 static int fs_index(const char __user * __name)
 {
 	struct file_system_type * tmp;
@@ -132,11 +225,14 @@ static int fs_index(const char __user * 
 
 	err = -EINVAL;
 	read_lock(&file_systems_lock);
-	for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next, index++) {
+	for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next) {
+		if (!check_ve_fstype(tmp, get_exec_env()))
+			continue;
 		if (strcmp(tmp->name,name) == 0) {
 			err = index;
 			break;
 		}
+		index++;
 	}
 	read_unlock(&file_systems_lock);
 	putname(name);
@@ -149,9 +245,15 @@ static int fs_name(unsigned int index, c
 	int len, res;
 
 	read_lock(&file_systems_lock);
-	for (tmp = file_systems; tmp; tmp = tmp->next, index--)
-		if (index <= 0 && try_module_get(tmp->owner))
-			break;
+	for (tmp = file_systems; tmp; tmp = tmp->next) {
+		if (!check_ve_fstype(tmp, get_exec_env()))
+			continue;
+		if (!index) {
+			if (try_get_filesystem(tmp))
+				break;
+		} else
+			index--;
+	}
 	read_unlock(&file_systems_lock);
 	if (!tmp)
 		return -EINVAL;
@@ -169,8 +271,9 @@ static int fs_maxindex(void)
 	int index;
 
 	read_lock(&file_systems_lock);
-	for (tmp = file_systems, index = 0 ; tmp ; tmp = tmp->next, index++)
-		;
+	for (tmp = file_systems, index = 0 ; tmp ; tmp = tmp->next)
+		if (check_ve_fstype(tmp, get_exec_env()))
+			index++;
 	read_unlock(&file_systems_lock);
 	return index;
 }
@@ -206,9 +309,10 @@ int get_filesystem_list(char * buf)
 	read_lock(&file_systems_lock);
 	tmp = file_systems;
 	while (tmp && len < PAGE_SIZE - 80) {
-		len += sprintf(buf+len, "%s\t%s\n",
-			(tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
-			tmp->name);
+		if (check_ve_fstype(tmp, get_exec_env()))
+			len += sprintf(buf+len, "%s\t%s\n",
+				(tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
+				tmp->name);
 		tmp = tmp->next;
 	}
 	read_unlock(&file_systems_lock);
@@ -220,14 +324,14 @@ struct file_system_type *get_fs_type(con
 	struct file_system_type *fs;
 
 	read_lock(&file_systems_lock);
-	fs = *(find_filesystem(name));
-	if (fs && !try_module_get(fs->owner))
+	fs = *(find_filesystem(name, get_exec_env()));
+	if (fs && !try_get_filesystem(fs))
 		fs = NULL;
 	read_unlock(&file_systems_lock);
 	if (!fs && (request_module("%s", name) == 0)) {
 		read_lock(&file_systems_lock);
-		fs = *(find_filesystem(name));
-		if (fs && !try_module_get(fs->owner))
+		fs = *(find_filesystem(name, get_exec_env()));
+		if (fs && !try_get_filesystem(fs))
 			fs = NULL;
 		read_unlock(&file_systems_lock);
 	}
@@ -235,3 +339,5 @@ struct file_system_type *get_fs_type(con
 }
 
 EXPORT_SYMBOL(get_fs_type);
+EXPORT_SYMBOL(get_filesystem);
+EXPORT_SYMBOL(put_filesystem);
diff -upr linux-2.6.16.46-0.12.orig/fs/fuse/dir.c linux-2.6.16.46-0.12-027test011/fs/fuse/dir.c
--- linux-2.6.16.46-0.12.orig/fs/fuse/dir.c	2007-08-24 19:28:09.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/fuse/dir.c	2007-08-28 17:35:36.000000000 +0400
@@ -108,6 +108,8 @@ static int fuse_dentry_revalidate(struct
 		struct fuse_entry_out outarg;
 		struct fuse_conn *fc;
 		struct fuse_req *req;
+		struct fuse_req *forget_req;
+		struct dentry *parent;
 
 		/* Doesn't hurt to "reset" the validity timeout */
 		fuse_invalidate_entry_cache(entry);
@@ -121,21 +123,33 @@ static int fuse_dentry_revalidate(struct
 		if (!req)
 			return 0;
 
-		fuse_lookup_init(req, entry->d_parent->d_inode, entry, &outarg);
+		forget_req = fuse_get_request(fc);
+		if (IS_ERR(forget_req)) {
+			fuse_put_request(fc, req);
+			return 0;
+		}
+
+		parent = dget_parent(entry);
+		fuse_lookup_init(req, parent->d_inode, entry, &outarg);
 		request_send(fc, req);
+		dput(parent);
 		err = req->out.h.error;
+		fuse_put_request(fc, req);
 		/* Zero nodeid is same as -ENOENT */
 		if (!err && !outarg.nodeid)
 			err = -ENOENT;
 		if (!err) {
 			struct fuse_inode *fi = get_fuse_inode(inode);
 			if (outarg.nodeid != get_node_id(inode)) {
-				fuse_send_forget(fc, req, outarg.nodeid, 1);
+				fuse_send_forget(fc, forget_req,
+						 outarg.nodeid, 1);
 				return 0;
 			}
+			spin_lock(&fuse_lock);
 			fi->nlookup ++;
+			spin_unlock(&fuse_lock);
 		}
-		fuse_put_request(fc, req);
+		fuse_put_request(fc, forget_req);
 		if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
 			return 0;
 
@@ -170,7 +184,7 @@ static struct dentry_operations fuse_den
 	.d_revalidate	= fuse_dentry_revalidate,
 };
 
-static int valid_mode(int m)
+int fuse_valid_type(int m)
 {
 	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
 		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
@@ -184,6 +198,7 @@ static struct dentry *fuse_lookup(struct
 	struct inode *inode = NULL;
 	struct fuse_conn *fc = get_fuse_conn(dir);
 	struct fuse_req *req;
+	struct fuse_req *forget_req;
 
 	if (entry->d_name.len > FUSE_NAME_MAX)
 		return ERR_PTR(-ENAMETOOLONG);
@@ -192,22 +207,30 @@ static struct dentry *fuse_lookup(struct
 	if (!req)
 		return ERR_PTR(-EINTR);
 
+	forget_req = fuse_get_request(fc);
+	if (IS_ERR(forget_req)) {
+		fuse_put_request(fc, req);
+		return ERR_PTR(PTR_ERR(forget_req));
+	}
+
 	fuse_lookup_init(req, dir, entry, &outarg);
 	request_send(fc, req);
 	err = req->out.h.error;
+	fuse_put_request(fc, req);
 	/* Zero nodeid is same as -ENOENT, but with valid timeout */
 	if (!err && outarg.nodeid &&
-	    (invalid_nodeid(outarg.nodeid) || !valid_mode(outarg.attr.mode)))
+	    (invalid_nodeid(outarg.nodeid) ||
+	     !fuse_valid_type(outarg.attr.mode)))
 		err = -EIO;
 	if (!err && outarg.nodeid) {
 		inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
 				  &outarg.attr);
 		if (!inode) {
-			fuse_send_forget(fc, req, outarg.nodeid, 1);
+			fuse_send_forget(fc, forget_req, outarg.nodeid, 1);
 			return ERR_PTR(-ENOMEM);
 		}
 	}
-	fuse_put_request(fc, req);
+	fuse_put_request(fc, forget_req);
 	if (err && err != -ENOENT)
 		return ERR_PTR(err);
 
@@ -328,6 +351,13 @@ static int create_new_entry(struct fuse_
 	struct fuse_entry_out outarg;
 	struct inode *inode;
 	int err;
+	struct fuse_req *forget_req;
+
+	forget_req = fuse_get_request(fc);
+	if (IS_ERR(forget_req)) {
+		fuse_put_request(fc, req);
+		return PTR_ERR(forget_req);
+	}
 
 	req->in.h.nodeid = get_node_id(dir);
 	req->inode = dir;
@@ -336,24 +366,24 @@ static int create_new_entry(struct fuse_
 	req->out.args[0].value = &outarg;
 	request_send(fc, req);
 	err = req->out.h.error;
-	if (err) {
-		fuse_put_request(fc, req);
-		return err;
-	}
+	fuse_put_request(fc, req);
+	if (err)
+		goto out_put_forget_req;
+
 	err = -EIO;
 	if (invalid_nodeid(outarg.nodeid))
-		goto out_put_request;
+		goto out_put_forget_req;
 
 	if ((outarg.attr.mode ^ mode) & S_IFMT)
-		goto out_put_request;
+		goto out_put_forget_req;
 
 	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
 			  &outarg.attr);
 	if (!inode) {
-		fuse_send_forget(fc, req, outarg.nodeid, 1);
+		fuse_send_forget(fc, forget_req, outarg.nodeid, 1);
 		return -ENOMEM;
 	}
-	fuse_put_request(fc, req);
+	fuse_put_request(fc, forget_req);
 
 	if (dir_alias(inode)) {
 		iput(inode);
@@ -365,8 +395,8 @@ static int create_new_entry(struct fuse_
 	fuse_invalidate_attr(dir);
 	return 0;
 
- out_put_request:
-	fuse_put_request(fc, req);
+ out_put_forget_req:
+	fuse_put_request(fc, forget_req);
 	return err;
 }
 
@@ -394,7 +424,7 @@ static int fuse_mknod(struct inode *dir,
 static int fuse_create(struct inode *dir, struct dentry *entry, int mode,
 		       struct nameidata *nd)
 {
-	if (nd && (nd->flags & LOOKUP_CREATE)) {
+	if (nd && (nd->flags & LOOKUP_OPEN)) {
 		int err = fuse_create_open(dir, entry, mode, nd);
 		if (err != -ENOSYS)
 			return err;
diff -upr linux-2.6.16.46-0.12.orig/fs/fuse/fuse_i.h linux-2.6.16.46-0.12-027test011/fs/fuse/fuse_i.h
--- linux-2.6.16.46-0.12.orig/fs/fuse/fuse_i.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/fuse/fuse_i.h	2007-08-28 17:35:30.000000000 +0400
@@ -501,3 +501,8 @@ int fuse_do_getattr(struct inode *inode)
  * Invalidate inode attributes
  */
 void fuse_invalidate_attr(struct inode *inode);
+
+/**
+ * Is file type valid?
+ */
+int fuse_valid_type(int m);
diff -upr linux-2.6.16.46-0.12.orig/fs/fuse/inode.c linux-2.6.16.46-0.12-027test011/fs/fuse/inode.c
--- linux-2.6.16.46-0.12.orig/fs/fuse/inode.c	2007-08-24 19:28:09.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/fuse/inode.c	2007-08-28 17:35:36.000000000 +0400
@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/parser.h>
 #include <linux/statfs.h>
+#include <linux/ve_proto.h>
 
 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
 MODULE_DESCRIPTION("Filesystem in Userspace");
@@ -170,7 +171,6 @@ struct inode *fuse_iget(struct super_blo
 	struct inode *inode;
 	struct fuse_inode *fi;
 	struct fuse_conn *fc = get_fuse_conn_super(sb);
-	int retried = 0;
 
  retry:
 	inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid);
@@ -184,16 +184,16 @@ struct inode *fuse_iget(struct super_blo
 		fuse_init_inode(inode, attr);
 		unlock_new_inode(inode);
 	} else if ((inode->i_mode ^ attr->mode) & S_IFMT) {
-		BUG_ON(retried);
 		/* Inode has changed type, any I/O on the old should fail */
 		make_bad_inode(inode);
 		iput(inode);
-		retried = 1;
 		goto retry;
 	}
 
 	fi = get_fuse_inode(inode);
+	spin_lock(&fuse_lock);
 	fi->nlookup ++;
+	spin_unlock(&fuse_lock);
 	fuse_change_attributes(inode, attr);
 	return inode;
 }
@@ -310,6 +310,8 @@ static int parse_fuse_opt(char *opt, str
 		case OPT_ROOTMODE:
 			if (match_octal(&args[0], &value))
 				return 0;
+			if (!fuse_valid_type(value))
+				return 0;
 			d->rootmode = value;
 			d->rootmode_present = 1;
 			break;
@@ -748,6 +750,41 @@ static void fuse_sysfs_cleanup(void)
 	subsystem_unregister(&fuse_subsys);
 }
 
+#ifdef CONFIG_VE
+static int fuse_start(void *data)
+{
+	struct ve_struct *ve;
+
+	ve = (struct ve_struct *)data;
+	if (ve->fuse_fs_type != NULL)
+		return -EBUSY;
+
+	INIT_LIST_HEAD(&ve->_fuse_conn_list);
+	return register_ve_fs_type(ve, &fuse_fs_type, &ve->fuse_fs_type, NULL);
+}
+
+static void fuse_stop(void *data)
+{
+	struct ve_struct *ve;
+
+	ve = (struct ve_struct *)data;
+	if (ve->fuse_fs_type == NULL)
+		return;
+
+	unregister_ve_fs_type(ve->fuse_fs_type, NULL);
+	kfree(ve->fuse_fs_type);
+	ve->fuse_fs_type = NULL;
+	BUG_ON(!list_empty(&ve->_fuse_conn_list));
+}
+
+static struct ve_hook fuse_ve_hook = {
+	.init		= fuse_start,
+	.fini		= fuse_stop,
+	.owner		= THIS_MODULE,
+	.priority	= HOOK_PRIO_FS,
+};
+#endif
+
 static int __init fuse_init(void)
 {
 	int res;
@@ -768,6 +805,7 @@ static int __init fuse_init(void)
 	if (res)
 		goto err_dev_cleanup;
 
+	ve_hook_register(VE_SS_CHAIN, &fuse_ve_hook);
 	return 0;
 
  err_dev_cleanup:
@@ -782,6 +820,7 @@ static void __exit fuse_exit(void)
 {
 	printk(KERN_DEBUG "fuse exit\n");
 
+	ve_hook_unregister(&fuse_ve_hook);
 	fuse_sysfs_cleanup();
 	fuse_fs_cleanup();
 	fuse_dev_cleanup();
diff -upr linux-2.6.16.46-0.12.orig/fs/hugetlbfs/inode.c linux-2.6.16.46-0.12-027test011/fs/hugetlbfs/inode.c
--- linux-2.6.16.46-0.12.orig/fs/hugetlbfs/inode.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/hugetlbfs/inode.c	2007-08-28 17:35:32.000000000 +0400
@@ -764,7 +764,7 @@ struct file *hugetlb_zero_setup(size_t s
 	struct inode *inode;
 	struct dentry *dentry, *root;
 	struct qstr quick_string;
-	char buf[16];
+	char buf[64];
 
 	if (!can_do_hugetlb_shm())
 		return ERR_PTR(-EPERM);
@@ -773,7 +773,8 @@ struct file *hugetlb_zero_setup(size_t s
 		return ERR_PTR(-ENOMEM);
 
 	root = hugetlbfs_vfsmount->mnt_root;
-	snprintf(buf, 16, "%lu", hugetlbfs_counter());
+	snprintf(buf, sizeof(buf), "VE%d-%lu",
+			VEID(get_exec_env()), hugetlbfs_counter());
 	quick_string.name = buf;
 	quick_string.len = strlen(quick_string.name);
 	quick_string.hash = 0;
diff -upr linux-2.6.16.46-0.12.orig/fs/inode.c linux-2.6.16.46-0.12-027test011/fs/inode.c
--- linux-2.6.16.46-0.12.orig/fs/inode.c	2007-08-24 19:28:30.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/inode.c	2007-08-28 17:35:33.000000000 +0400
@@ -9,6 +9,7 @@
 #include <linux/mm.h>
 #include <linux/dcache.h>
 #include <linux/init.h>
+#include <linux/kernel_stat.h>
 #include <linux/quotaops.h>
 #include <linux/slab.h>
 #include <linux/writeback.h>
@@ -98,13 +99,15 @@ DECLARE_MUTEX(iprune_sem);
  */
 struct inodes_stat_t inodes_stat;
 
-static kmem_cache_t * inode_cachep;
+kmem_cache_t *inode_cachep;
+
+static struct address_space_operations vfs_empty_aops;
+struct inode_operations vfs_empty_iops;
+static struct file_operations vfs_empty_fops;
+EXPORT_SYMBOL(vfs_empty_iops);
 
 static struct inode *alloc_inode(struct super_block *sb)
 {
-	static struct address_space_operations empty_aops;
-	static struct inode_operations empty_iops;
-	static struct file_operations empty_fops;
 	struct inode *inode;
 
 	if (sb->s_op->alloc_inode)
@@ -119,8 +122,8 @@ static struct inode *alloc_inode(struct 
 		inode->i_blkbits = sb->s_blocksize_bits;
 		inode->i_flags = 0;
 		atomic_set(&inode->i_count, 1);
-		inode->i_op = &empty_iops;
-		inode->i_fop = &empty_fops;
+		inode->i_op = &vfs_empty_iops;
+		inode->i_fop = &vfs_empty_fops;
 		inode->i_nlink = 1;
 		atomic_set(&inode->i_writecount, 0);
 		inode->i_size = 0;
@@ -144,7 +147,7 @@ static struct inode *alloc_inode(struct 
 			return NULL;
 		}
 
-		mapping->a_ops = &empty_aops;
+		mapping->a_ops = &vfs_empty_aops;
  		mapping->host = inode;
 		mapping->flags = 0;
 		mapping_set_gfp_mask(mapping, GFP_HIGHUSER);
@@ -303,13 +306,57 @@ static void dispose_list(struct list_hea
 	spin_unlock(&inode_lock);
 }
 
+static void show_header(struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+
+	printk("VFS: Busy inodes after unmount. "
+			"sb = %p, fs type = %s, sb count = %d, "
+			"sb->s_root = %s\n", sb,
+			(sb->s_type != NULL) ? sb->s_type->name : "",
+			sb->s_count,
+			(sb->s_root != NULL) ?
+			(char *)sb->s_root->d_name.name : "");
+}
+
+static void show_inode(struct inode *inode)
+{
+	struct dentry *d;
+	int i;
+
+	printk("inode = %p, inode->i_count = %d, "
+			"inode->i_nlink = %d, "
+			"inode->i_mode = %d, "
+			"inode->i_state = %ld, "
+			"inode->i_flags = %d, "
+			"inode->i_devices.next = %p, "
+			"inode->i_devices.prev = %p, "
+			"inode->i_ino = %ld\n",
+			inode,
+			atomic_read(&inode->i_count),
+			inode->i_nlink,
+			inode->i_mode,
+			inode->i_state,
+			inode->i_flags,
+			inode->i_devices.next,
+			inode->i_devices.prev,
+			inode->i_ino);
+	printk("inode dump: ");
+	for (i = 0; i < sizeof(*inode); i++)
+		printk("%2.2x ", *((u_char *)inode + i));
+	printk("\n");
+	list_for_each_entry(d, &inode->i_dentry, d_alias)
+		printk("  d_alias %s\n",
+				d->d_name.name);
+}
+
 /*
  * Invalidate all inodes for a device.
  */
-static int invalidate_list(struct list_head *head, struct list_head *dispose)
+static int invalidate_list(struct list_head *head, struct list_head *dispose, int check)
 {
 	struct list_head *next;
-	int busy = 0, count = 0;
+	int busy = 0, count = 0, once = 1;
 
 	next = head->next;
 	for (;;) {
@@ -336,6 +383,14 @@ static int invalidate_list(struct list_h
 			continue;
 		}
 		busy = 1;
+
+		if (check) {
+			if (once) {
+				once = 0;
+				show_header(inode);
+			}
+			show_inode(inode);
+		}
 	}
 	/* only unused inodes may be cached with i_count zero */
 	inodes_stat.nr_unused -= count;
@@ -350,7 +405,7 @@ static int invalidate_list(struct list_h
  *	fails because there are busy inodes then a non zero value is returned.
  *	If the discard is successful all the inodes have been discarded.
  */
-int invalidate_inodes(struct super_block * sb)
+int invalidate_inodes(struct super_block * sb, int check)
 {
 	int busy;
 	LIST_HEAD(throw_away);
@@ -358,7 +413,7 @@ int invalidate_inodes(struct super_block
 	down(&iprune_sem);
 	spin_lock(&inode_lock);
 	inotify_unmount_inodes(&sb->s_inodes);
-	busy = invalidate_list(&sb->s_inodes, &throw_away);
+	busy = invalidate_list(&sb->s_inodes, &throw_away, check);
 	spin_unlock(&inode_lock);
 
 	dispose_list(&throw_away);
@@ -382,7 +437,7 @@ int __invalidate_device(struct block_dev
 		 * hold).
 		 */
 		shrink_dcache_sb(sb);
-		res = invalidate_inodes(sb);
+		res = invalidate_inodes(sb, 0);
 		drop_super(sb);
 	}
 	invalidate_bdev(bdev, 0);
@@ -478,6 +533,7 @@ static void prune_icache(int nr_to_scan)
  */
 static int shrink_icache_memory(int nr, gfp_t gfp_mask)
 {
+	KSTAT_PERF_ENTER(shrink_icache)
 	if (nr) {
 		/*
 		 * Nasty deadlock avoidance.  We may hold various FS locks,
@@ -488,6 +544,7 @@ static int shrink_icache_memory(int nr, 
 			return -1;
 		prune_icache(nr);
 	}
+	KSTAT_PERF_LEAVE(shrink_icache)
 	return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
 }
 
@@ -726,7 +783,8 @@ static inline unsigned long hash(struct 
  */
 ino_t iunique(struct super_block *sb, ino_t max_reserved)
 {
-	static ino_t counter;
+	/* 32 bits for compatability mode stat calls */
+	static unsigned int counter;
 	struct inode *inode;
 	struct hlist_head * head;
 	ino_t res;
@@ -806,7 +864,7 @@ EXPORT_SYMBOL(iunique_unregister);
 struct inode *igrab(struct inode *inode)
 {
 	spin_lock(&inode_lock);
-	if (!(inode->i_state & (I_FREEING|I_WILL_FREE)))
+	if (inode && !(inode->i_state & (I_FREEING|I_WILL_FREE)))
 		__iget(inode);
 	else
 		/*
diff -upr linux-2.6.16.46-0.12.orig/fs/inotify.c linux-2.6.16.46-0.12-027test011/fs/inotify.c
--- linux-2.6.16.46-0.12.orig/fs/inotify.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/inotify.c	2007-08-28 17:35:33.000000000 +0400
@@ -31,6 +31,7 @@
 #include <linux/list.h>
 #include <linux/writeback.h>
 #include <linux/inotify.h>
+#include <linux/mount.h>
 
 static atomic_t inotify_cookie;
 
@@ -68,19 +69,6 @@ static atomic_t inotify_cookie;
  * inotify_add_watch() to the final put_inotify_watch().
  */
 
-/*
- * struct inotify_handle - represents an inotify instance
- *
- * This structure is protected by the mutex 'mutex'.
- */
-struct inotify_handle {
-	struct idr		idr;		/* idr mapping wd -> watch */
-	struct mutex		mutex;		/* protects this bad boy */
-	struct list_head	watches;	/* list of watches */
-	atomic_t		count;		/* reference count */
-	u32			last_wd;	/* the last wd allocated */
-	const struct inotify_operations *in_ops; /* inotify caller operations */
-};
 
 static inline void get_inotify_handle(struct inotify_handle *ih)
 {
@@ -117,6 +105,10 @@ void put_inotify_watch(struct inotify_wa
 		struct inotify_handle *ih = watch->ih;
 
 		iput(watch->inode);
+		dput(watch->dentry);
+		mntput(watch->mnt);
+		watch->dentry = NULL;
+		watch->mnt = NULL;
 		ih->in_ops->destroy_watch(watch);
 		put_inotify_handle(ih);
 	}
@@ -482,6 +474,8 @@ void inotify_init_watch(struct inotify_w
 	INIT_LIST_HEAD(&watch->i_list);
 	atomic_set(&watch->count, 0);
 	get_inotify_watch(watch); /* initial get */
+	watch->dentry = NULL;
+	watch->mnt = NULL;
 }
 EXPORT_SYMBOL_GPL(inotify_init_watch);
 
@@ -622,8 +616,10 @@ EXPORT_SYMBOL_GPL(inotify_find_update_wa
  * Caller must ensure it only calls inotify_add_watch() once per watch.
  * Calls inotify_handle_get_wd() so may sleep.
  */
-s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch,
-		      struct inode *inode, u32 mask)
+s32 __inotify_add_watch(struct inotify_handle *ih,
+		        struct inotify_watch *watch,
+			struct dentry *d, struct vfsmount *mnt,
+			struct inode * inode, u32 mask)
 {
 	int ret = 0;
 
@@ -650,6 +646,10 @@ s32 inotify_add_watch(struct inotify_han
 	 * Save a reference to the inode and bump the ref count to make it
 	 * official.  We hold a reference to nameidata, which makes this safe.
 	 */
+	if (d) {
+		watch->dentry = dget(d);
+		watch->mnt = mntget(mnt);
+	}
 	watch->inode = igrab(inode);
 
 	if (!inotify_inode_watched(inode))
@@ -665,6 +665,19 @@ out:
 }
 EXPORT_SYMBOL_GPL(inotify_add_watch);
 
+s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch,
+		      struct inode *inode, u32 mask)
+{
+	return __inotify_add_watch(ih, watch, NULL, NULL, inode, mask);
+}
+
+s32 inotify_add_watch_dget(struct inotify_handle *ih,
+			   struct inotify_watch *watch, struct dentry *d,
+			   struct vfsmount *mnt, u32 mask)
+{
+	return __inotify_add_watch(ih, watch, d, mnt, d->d_inode, mask);
+}
+
 /**
  * inotify_rm_wd - remove a watch from an inotify instance
  * @ih: inotify handle
diff -upr linux-2.6.16.46-0.12.orig/fs/inotify_user.c linux-2.6.16.46-0.12-027test011/fs/inotify_user.c
--- linux-2.6.16.46-0.12.orig/fs/inotify_user.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/inotify_user.c	2007-08-28 17:35:33.000000000 +0400
@@ -20,6 +20,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
@@ -65,46 +66,6 @@ int inotify_max_queued_events __read_mos
  * first event, or to inotify_destroy().
  */
 
-/*
- * struct inotify_device - represents an inotify instance
- *
- * This structure is protected by the mutex 'mutex'.
- */
-struct inotify_device {
-	wait_queue_head_t 	wq;		/* wait queue for i/o */
-	struct mutex		ev_mutex;	/* protects event queue */
-	struct mutex		up_mutex;	/* synchronizes watch updates */
-	struct list_head 	events;		/* list of queued events */
-	atomic_t		count;		/* reference count */
-	struct user_struct	*user;		/* user who opened this dev */
-	struct inotify_handle	*ih;		/* inotify handle */
-	unsigned int		queue_size;	/* size of the queue (bytes) */
-	unsigned int		event_count;	/* number of pending events */
-	unsigned int		max_events;	/* maximum number of events */
-};
-
-/*
- * struct inotify_kernel_event - An inotify event, originating from a watch and
- * queued for user-space.  A list of these is attached to each instance of the
- * device.  In read(), this list is walked and all events that can fit in the
- * buffer are returned.
- *
- * Protected by dev->ev_mutex of the device in which we are queued.
- */
-struct inotify_kernel_event {
-	struct inotify_event	event;	/* the user-space event */
-	struct list_head        list;	/* entry in inotify_device's list */
-	char			*name;	/* filename, if any */
-};
-
-/*
- * struct inotify_user_watch - our version of an inotify_watch, we add
- * a reference to the associated inotify_device.
- */
-struct inotify_user_watch {
-	struct inotify_device	*dev;	/* associated device */
-	struct inotify_watch	wdata;	/* inotify watch data */
-};
 
 #ifdef CONFIG_SYSCTL
 
@@ -361,8 +322,8 @@ static int find_inode(const char __user 
  *
  * Callers must hold dev->up_mutex.
  */
-static int create_watch(struct inotify_device *dev, struct inode *inode,
-			u32 mask)
+int inotify_create_watch(struct inotify_device *dev, struct dentry *d,
+			 struct vfsmount *mnt, u32 mask)
 {
 	struct inotify_user_watch *watch;
 	int ret;
@@ -382,12 +343,13 @@ static int create_watch(struct inotify_d
 	atomic_inc(&dev->user->inotify_watches);
 
 	inotify_init_watch(&watch->wdata);
-	ret = inotify_add_watch(dev->ih, &watch->wdata, inode, mask);
+	ret = inotify_add_watch_dget(dev->ih, &watch->wdata, d, mnt, mask);
 	if (ret < 0)
 		free_inotify_user_watch(&watch->wdata);
 
 	return ret;
 }
+EXPORT_SYMBOL(inotify_create_watch);
 
 /* Device Interface */
 
@@ -519,13 +481,14 @@ static long inotify_ioctl(struct file *f
 	return ret;
 }
 
-static const struct file_operations inotify_fops = {
+const struct file_operations inotify_fops = {
 	.poll           = inotify_poll,
 	.read           = inotify_read,
 	.release        = inotify_release,
 	.unlocked_ioctl = inotify_ioctl,
 	.compat_ioctl	= inotify_ioctl,
 };
+EXPORT_SYMBOL(inotify_fops);
 
 static const struct inotify_operations inotify_user_ops = {
 	.handle_event	= inotify_dev_queue_event,
@@ -602,6 +565,7 @@ out_put_fd:
 	put_unused_fd(fd);
 	return ret;
 }
+EXPORT_SYMBOL(sys_inotify_init);
 
 asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
 {
@@ -638,7 +602,7 @@ asmlinkage long sys_inotify_add_watch(in
 	mutex_lock(&dev->up_mutex);
 	ret = inotify_find_update_watch(dev->ih, inode, mask);
 	if (ret == -ENOENT)
-		ret = create_watch(dev, inode, mask);
+		ret = inotify_create_watch(dev, nd.dentry, nd.mnt, mask);
 	mutex_unlock(&dev->up_mutex);
 
 	path_release(&nd);
@@ -677,7 +641,7 @@ static struct super_block *
 inotify_get_sb(struct file_system_type *fs_type, int flags,
 	       const char *dev_name, void *data)
 {
-    return get_sb_pseudo(fs_type, "inotify", NULL, 0xBAD1DEA);
+    return get_sb_pseudo(fs_type, "inotify", NULL, 0x2BAD1DEA);
 }
 
 static struct file_system_type inotify_fs_type = {
diff -upr linux-2.6.16.46-0.12.orig/fs/ioprio.c linux-2.6.16.46-0.12-027test011/fs/ioprio.c
--- linux-2.6.16.46-0.12.orig/fs/ioprio.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/ioprio.c	2007-08-28 17:35:31.000000000 +0400
@@ -53,6 +53,9 @@ asmlinkage long sys_ioprio_set(int which
 	struct user_struct *user;
 	int ret;
 
+	if (!ve_is_super(get_exec_env()))
+		return -EPERM;
+
 	switch (class) {
 		case IOPRIO_CLASS_RT:
 			if (!capable(CAP_SYS_ADMIN))
@@ -78,18 +81,18 @@ asmlinkage long sys_ioprio_set(int which
 			if (!who)
 				p = current;
 			else
-				p = find_task_by_pid(who);
+				p = find_task_by_pid_all(who);
 			if (p)
 				ret = set_task_ioprio(p, ioprio);
 			break;
 		case IOPRIO_WHO_PGRP:
 			if (!who)
 				who = process_group(current);
-			do_each_task_pid(who, PIDTYPE_PGID, p) {
+			do_each_task_pid_all(who, PIDTYPE_PGID, p) {
 				ret = set_task_ioprio(p, ioprio);
 				if (ret)
 					break;
-			} while_each_task_pid(who, PIDTYPE_PGID, p);
+			} while_each_task_pid_all(who, PIDTYPE_PGID, p);
 			break;
 		case IOPRIO_WHO_USER:
 			if (!who)
@@ -100,13 +103,13 @@ asmlinkage long sys_ioprio_set(int which
 			if (!user)
 				break;
 
-			do_each_thread(g, p) {
+			do_each_thread_all(g, p) {
 				if (p->uid != who)
 					continue;
 				ret = set_task_ioprio(p, ioprio);
 				if (ret)
 					break;
-			} while_each_thread(g, p);
+			} while_each_thread_all(g, p);
 
 			if (who)
 				free_uid(user);
@@ -131,19 +134,19 @@ asmlinkage long sys_ioprio_get(int which
 			if (!who)
 				p = current;
 			else
-				p = find_task_by_pid(who);
+				p = find_task_by_pid_ve(who);
 			if (p)
 				ret = p->ioprio;
 			break;
 		case IOPRIO_WHO_PGRP:
 			if (!who)
 				who = process_group(current);
-			do_each_task_pid(who, PIDTYPE_PGID, p) {
+			do_each_task_pid_ve(who, PIDTYPE_PGID, p) {
 				if (ret == -ESRCH)
 					ret = p->ioprio;
 				else
 					ret = ioprio_best(ret, p->ioprio);
-			} while_each_task_pid(who, PIDTYPE_PGID, p);
+			} while_each_task_pid_ve(who, PIDTYPE_PGID, p);
 			break;
 		case IOPRIO_WHO_USER:
 			if (!who)
@@ -154,14 +157,14 @@ asmlinkage long sys_ioprio_get(int which
 			if (!user)
 				break;
 
-			do_each_thread(g, p) {
+			do_each_thread_ve(g, p) {
 				if (p->uid != user->uid)
 					continue;
 				if (ret == -ESRCH)
 					ret = p->ioprio;
 				else
 					ret = ioprio_best(ret, p->ioprio);
-			} while_each_thread(g, p);
+			} while_each_thread_ve(g, p);
 
 			if (who)
 				free_uid(user);
diff -upr linux-2.6.16.46-0.12.orig/fs/lockd/clntproc.c linux-2.6.16.46-0.12-027test011/fs/lockd/clntproc.c
--- linux-2.6.16.46-0.12.orig/fs/lockd/clntproc.c	2007-08-24 19:28:30.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/lockd/clntproc.c	2007-08-28 17:35:33.000000000 +0400
@@ -130,11 +130,11 @@ static void nlmclnt_setlockargs(struct n
 	nlmclnt_next_cookie(&argp->cookie);
 	argp->state   = nsm_local_state;
 	memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh));
-	lock->caller  = system_utsname.nodename;
+	lock->caller  = ve_utsname.nodename;
 	lock->oh.data = req->a_owner;
 	lock->oh.len  = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s",
 				(unsigned int)fl->fl_u.nfs_fl.owner->pid,
-				system_utsname.nodename);
+				ve_utsname.nodename);
 	lock->svid = fl->fl_u.nfs_fl.owner->pid;
 	locks_copy_lock(&lock->fl, fl);
 }
@@ -156,7 +156,7 @@ nlmclnt_setgrantargs(struct nlm_rqst *ca
 {
 	locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
 	memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
-	call->a_args.lock.caller = system_utsname.nodename;
+	call->a_args.lock.caller = ve_utsname.nodename;
 	call->a_args.lock.oh.len = lock->oh.len;
 
 	/* set default data area */
@@ -202,6 +202,7 @@ nlmclnt_proc(struct inode *inode, int cm
 	sigset_t		oldset;
 	unsigned long		flags;
 	int			status, proto, vers;
+	struct ve_struct	*ve;
 
 	vers = (NFS_PROTO(inode)->version == 3) ? 4 : 1;
 	if (NFS_PROTO(inode)->version > 3) {
@@ -211,11 +212,14 @@ nlmclnt_proc(struct inode *inode, int cm
 
 	/* Retrieve transport protocol from NFS client */
 	proto = NFS_CLIENT(inode)->cl_xprt->prot;
+	ve = set_exec_env(NFS_CLIENT(inode)->cl_xprt->owner_env);
 
 	host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers,
 				nfssrv->hostname, strlen(nfssrv->hostname));
-	if (host == NULL)
-		return -ENOLCK;
+	if (host == NULL) {
+		status = -ENOLCK;
+		goto out;
+	}
 
 	/* Create RPC client handle if not there, and copy soft
 	 * and intr flags from NFS client. */
@@ -284,6 +288,8 @@ nlmclnt_proc(struct inode *inode, int cm
 done:
 	dprintk("lockd: clnt proc returns %d\n", status);
 	nlm_release_host(host);
+out:
+	(void)set_exec_env(ve);
 	return status;
 }
 EXPORT_SYMBOL(nlmclnt_proc);
diff -upr linux-2.6.16.46-0.12.orig/fs/lockd/host.c linux-2.6.16.46-0.12-027test011/fs/lockd/host.c
--- linux-2.6.16.46-0.12.orig/fs/lockd/host.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/lockd/host.c	2007-08-28 17:35:33.000000000 +0400
@@ -91,6 +91,7 @@ nlm_lookup_host(int server, const struct
 	struct nlm_host	*host;
 	struct nsm_handle *nsm = NULL;
 	int		hash;
+	struct ve_struct *ve;
 
 	dprintk("lockd: nlm_lookup_host(%u.%u.%u.%u, p=%d, v=%d, my role=%s, name=%.*s)\n",
 			NIPQUAD(sin->sin_addr.s_addr), proto, version,
@@ -115,9 +116,12 @@ nlm_lookup_host(int server, const struct
 	 * This would allow us to have one nlm_host per address.
 	 */
 	chain = &nlm_hosts[hash];
+	ve = get_exec_env();
 	hlist_for_each_entry(host, pos, chain, h_hash) {
 		if (!nlm_cmp_addr(&host->h_addr, sin))
 			continue;
+		if (!ve_accessible_strict(host->owner_env, ve))
+			continue;
 
 		/* See if we have an NSM handle for this client */
 		if (!nsm)
@@ -171,6 +175,7 @@ nlm_lookup_host(int server, const struct
 	hlist_add_head(&host->h_hash, chain);
 	INIT_LIST_HEAD(&host->h_lockowners);
 	spin_lock_init(&host->h_lock);
+	host->owner_env    = ve;
 
 	if (++nrhosts > nlm_max_hosts)
 		next_gc = 0;
diff -upr linux-2.6.16.46-0.12.orig/fs/lockd/mon.c linux-2.6.16.46-0.12-027test011/fs/lockd/mon.c
--- linux-2.6.16.46-0.12.orig/fs/lockd/mon.c	2007-08-24 19:28:11.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/lockd/mon.c	2007-08-28 17:35:31.000000000 +0400
@@ -181,7 +181,7 @@ xdr_encode_common(struct rpc_rqst *rqstp
 		name = buffer;
 	}
 	if (!(p = xdr_encode_string(p, name))
-	 || !(p = xdr_encode_string(p, system_utsname.nodename)))
+		|| !(p = xdr_encode_string(p, ve_utsname.nodename)))
 		return ERR_PTR(-EIO);
 	*p++ = htonl(argp->prog);
 	*p++ = htonl(argp->vers);
diff -upr linux-2.6.16.46-0.12.orig/fs/lockd/svc.c linux-2.6.16.46-0.12-027test011/fs/lockd/svc.c
--- linux-2.6.16.46-0.12.orig/fs/lockd/svc.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/lockd/svc.c	2007-08-28 17:35:33.000000000 +0400
@@ -46,10 +46,11 @@ struct nlmsvc_binding *		nlmsvc_ops;
 EXPORT_SYMBOL(nlmsvc_ops);
 
 static DECLARE_MUTEX(nlmsvc_sema);
-static unsigned int		nlmsvc_users;
-static pid_t			nlmsvc_pid;
-int				nlmsvc_grace_period;
-unsigned long			nlmsvc_timeout;
+static unsigned int		_nlmsvc_users;
+static pid_t			_nlmsvc_pid;
+int				_nlmsvc_grace_period;
+unsigned long			_nlmsvc_timeout;
+
 
 static DECLARE_MUTEX_LOCKED(lockd_start);
 static DECLARE_WAIT_QUEUE_HEAD(lockd_exit);
@@ -181,8 +182,13 @@ lockd(struct svc_rqst *rqstp)
 		 * recvfrom routine.
 		 */
 		err = svc_recv(serv, rqstp, timeout);
-		if (err == -EAGAIN || err == -EINTR)
+		if (err == -EAGAIN || err == -EINTR) {
+#ifdef CONFIG_VE
+			if (!get_exec_env()->is_running)
+				break;
+#endif
 			continue;
+		}
 		if (err < 0) {
 			printk(KERN_WARNING
 			       "lockd: terminating on error %d\n",
diff -upr linux-2.6.16.46-0.12.orig/fs/lockd/svcsubs.c linux-2.6.16.46-0.12-027test011/fs/lockd/svcsubs.c
--- linux-2.6.16.46-0.12.orig/fs/lockd/svcsubs.c	2007-08-24 19:28:11.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/lockd/svcsubs.c	2007-08-28 17:35:33.000000000 +0400
@@ -322,7 +322,8 @@ nlmsvc_same_host(struct nlm_host *host, 
 static int
 nlmsvc_is_client(struct nlm_host *host, struct nlm_host *dummy)
 {
-	return host->h_server;
+	return ve_accessible_strict(host->owner_env, get_exec_env()) &&
+		host->h_server;
 }
 
 /*
diff -upr linux-2.6.16.46-0.12.orig/fs/locks.c linux-2.6.16.46-0.12-027test011/fs/locks.c
--- linux-2.6.16.46-0.12.orig/fs/locks.c	2007-08-24 19:28:09.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/locks.c	2007-08-28 17:35:34.000000000 +0400
@@ -129,6 +129,8 @@
 #include <asm/semaphore.h>
 #include <asm/uaccess.h>
 
+#include <ub/ub_misc.h>
+
 #define IS_POSIX(fl)	(fl->fl_flags & FL_POSIX)
 #define IS_FLOCK(fl)	(fl->fl_flags & FL_FLOCK)
 #define IS_LEASE(fl)	(fl->fl_flags & FL_LEASE)
@@ -148,11 +150,28 @@ static LIST_HEAD(blocked_list);
 static kmem_cache_t *filelock_cache;
 
 /* Allocate an empty lock structure. */
-static struct file_lock *locks_alloc_lock(void)
+static struct file_lock *locks_alloc_lock(int charge)
 {
-	return kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
+	struct file_lock *fl;
+
+	fl = kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
+#ifdef CONFIG_USER_RESOURCE
+	if (fl == NULL)
+		goto out;
+	fl->fl_charged = 0;
+	if (!charge)
+		goto out;
+	if (!ub_flock_charge(fl, 1))
+		goto out;
+
+	kmem_cache_free(filelock_cache, fl);
+	fl = NULL;
+out:
+#endif
+	return fl;
 }
 
+
 /* Free a lock which is not in use. */
 static void locks_free_lock(struct file_lock *fl)
 {
@@ -181,6 +200,7 @@ static void locks_free_lock(struct file_
 		fl->fl_lmops = NULL;
 	}
 
+	ub_flock_uncharge(fl);
 	kmem_cache_free(filelock_cache, fl);
 }
 
@@ -263,7 +283,7 @@ static int flock_make_lock(struct file *
 	if (type < 0)
 		return type;
 	
-	fl = locks_alloc_lock();
+	fl = locks_alloc_lock(type != F_UNLCK);
 	if (fl == NULL)
 		return -ENOMEM;
 
@@ -450,7 +470,7 @@ static int lease_init(struct file *filp,
 /* Allocate a file_lock initialised to this type of lease */
 static int lease_alloc(struct file *filp, int type, struct file_lock **flp)
 {
-	struct file_lock *fl = locks_alloc_lock();
+	struct file_lock *fl = locks_alloc_lock(1);
 	int error = -ENOMEM;
 
 	if (fl == NULL)
@@ -741,9 +761,18 @@ static int flock_lock_file(struct file *
 	if (request->fl_type == F_UNLCK)
 		goto out;
 
-	new_fl = locks_alloc_lock();
-	if (new_fl == NULL)
+	/*
+	 * Nont F_UNLCK request must be already charged in
+	 * flock_make_lock().
+	 *
+	 * actually new_fl must be charged not the request,
+	 * but we try to fail earlier
+	 */
+	new_fl = locks_alloc_lock(0);
+	if (new_fl == NULL) {
+		error = -ENOMEM;
 		goto out;
+	}	
 	/*
 	 * If a higher-priority process was blocked on the old file lock,
 	 * give it the opportunity to lock the file.
@@ -764,6 +793,10 @@ static int flock_lock_file(struct file *
 			locks_insert_block(fl, request);
 		goto out;
 	}
+
+	set_flock_charged(new_fl);
+	unset_flock_charged(request);
+
 	locks_copy_lock(new_fl, request);
 	locks_insert_lock(&inode->i_flock, new_fl);
 	new_fl = NULL;
@@ -790,8 +823,11 @@ static int __posix_lock_file(struct inod
 	 * We may need two file_lock structures for this operation,
 	 * so we get them in advance to avoid races.
 	 */
-	new_fl = locks_alloc_lock();
-	new_fl2 = locks_alloc_lock();
+	if (request->fl_type != F_UNLCK)
+		new_fl = locks_alloc_lock(1);
+	else
+		new_fl = NULL;
+	new_fl2 = locks_alloc_lock(0);
 
 	lock_kernel();
 	if (request->fl_type != F_UNLCK) {
@@ -819,7 +855,7 @@ static int __posix_lock_file(struct inod
 		goto out;
 
 	error = -ENOLCK; /* "no luck" */
-	if (!(new_fl && new_fl2))
+	if (!((request->fl_type == F_UNLCK || new_fl) && new_fl2))
 		goto out;
 
 	/*
@@ -925,19 +961,30 @@ static int __posix_lock_file(struct inod
 	if (!added) {
 		if (request->fl_type == F_UNLCK)
 			goto out;
+		error = -ENOLCK;
+		if (right && (left == right) && ub_flock_charge(new_fl, 1))
+			goto out;
 		locks_copy_lock(new_fl, request);
 		locks_insert_lock(before, new_fl);
 		new_fl = NULL;
+		error = 0;
 	}
 	if (right) {
 		if (left == right) {
 			/* The new lock breaks the old one in two pieces,
 			 * so we have to use the second new lock.
 			 */
+			error = -ENOLCK;
+			if (added && ub_flock_charge(new_fl2,
+						request->fl_type != F_UNLCK))
+				goto out;
+			/* FIXME move all fl_charged manipulations in ub code */
+			set_flock_charged(new_fl2);
 			left = new_fl2;
 			new_fl2 = NULL;
 			locks_copy_lock(left, right);
 			locks_insert_lock(before, left);
+			error = 0;
 		}
 		right->fl_start = request->fl_end + 1;
 		locks_wake_up_blocks(right);
@@ -1357,7 +1404,7 @@ static int __setlease(struct file *filp,
 		goto out;
 
 	error = -ENOMEM;
-	fl = locks_alloc_lock();
+	fl = locks_alloc_lock(1);
 	if (fl == NULL)
 		goto out;
 
@@ -1545,6 +1592,7 @@ asmlinkage long sys_flock(unsigned int f
  out:
 	return error;
 }
+EXPORT_SYMBOL_GPL(sys_flock);
 
 /* Report the first existing lock that would conflict with l.
  * This implements the F_GETLK command of fcntl().
@@ -1580,7 +1628,7 @@ int fcntl_getlk(struct file *filp, struc
  
 	flock.l_type = F_UNLCK;
 	if (fl != NULL) {
-		flock.l_pid = fl->fl_pid;
+		flock.l_pid = pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
 #if BITS_PER_LONG == 32
 		/*
 		 * Make sure we can represent the posix lock via
@@ -1612,7 +1660,7 @@ out:
 int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 		struct flock __user *l)
 {
-	struct file_lock *file_lock = locks_alloc_lock();
+	struct file_lock *file_lock = locks_alloc_lock(0);
 	struct flock flock;
 	struct inode *inode;
 	int error;
@@ -1734,7 +1782,7 @@ int fcntl_getlk64(struct file *filp, str
  
 	flock.l_type = F_UNLCK;
 	if (fl != NULL) {
-		flock.l_pid = fl->fl_pid;
+		flock.l_pid = pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
 		flock.l_start = fl->fl_start;
 		flock.l_len = fl->fl_end == OFFSET_MAX ? 0 :
 			fl->fl_end - fl->fl_start + 1;
@@ -1755,7 +1803,7 @@ out:
 int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
 		struct flock64 __user *l)
 {
-	struct file_lock *file_lock = locks_alloc_lock();
+	struct file_lock *file_lock = locks_alloc_lock(0);
 	struct flock64 flock;
 	struct inode *inode;
 	int error;
@@ -1983,7 +2031,9 @@ EXPORT_SYMBOL(posix_unblock_lock);
 static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx)
 {
 	struct inode *inode = NULL;
+	unsigned int fl_pid;
 
+	fl_pid = pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
 	if (fl->fl_file != NULL)
 		inode = fl->fl_file->f_dentry->d_inode;
 
@@ -2025,16 +2075,16 @@ static void lock_get_status(char* out, s
 	}
 	if (inode) {
 #ifdef WE_CAN_BREAK_LSLK_NOW
-		out += sprintf(out, "%d %s:%ld ", fl->fl_pid,
+		out += sprintf(out, "%d %s:%ld ", fl_pid,
 				inode->i_sb->s_id, inode->i_ino);
 #else
 		/* userspace relies on this representation of dev_t ;-( */
-		out += sprintf(out, "%d %02x:%02x:%ld ", fl->fl_pid,
+		out += sprintf(out, "%d %02x:%02x:%ld ", fl_pid,
 				MAJOR(inode->i_sb->s_dev),
 				MINOR(inode->i_sb->s_dev), inode->i_ino);
 #endif
 	} else {
-		out += sprintf(out, "%d <none>:0 ", fl->fl_pid);
+		out += sprintf(out, "%d <none>:0 ", fl_pid);
 	}
 	if (IS_POSIX(fl)) {
 		if (fl->fl_end == OFFSET_MAX)
@@ -2083,11 +2133,18 @@ int get_locks_status(char *buffer, char 
 	char *q = buffer;
 	off_t pos = 0;
 	int i = 0;
+	struct ve_struct *env;
 
 	lock_kernel();
+	env = get_exec_env();
 	list_for_each(tmp, &file_lock_list) {
 		struct list_head *btmp;
-		struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link);
+		struct file_lock *fl;
+		
+		fl = list_entry(tmp, struct file_lock, fl_link);
+		if (!ve_accessible(fl->fl_file->owner_env, env))
+			continue;
+
 		lock_get_status(q, fl, ++i, "");
 		move_lock_status(&q, &pos, offset);
 
@@ -2250,7 +2307,7 @@ EXPORT_SYMBOL(steal_locks);
 static int __init filelock_init(void)
 {
 	filelock_cache = kmem_cache_create("file_lock_cache",
-			sizeof(struct file_lock), 0, SLAB_PANIC,
+			sizeof(struct file_lock), 0, SLAB_PANIC | SLAB_UBC,
 			init_once, NULL);
 	return 0;
 }
diff -upr linux-2.6.16.46-0.12.orig/fs/namei.c linux-2.6.16.46-0.12-027test011/fs/namei.c
--- linux-2.6.16.46-0.12.orig/fs/namei.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/namei.c	2007-08-28 17:35:33.000000000 +0400
@@ -141,6 +141,7 @@ char * getname(const char __user * filen
 {
 	char *tmp, *result;
 
+	ub_dentry_checkup();
 	result = ERR_PTR(-ENOMEM);
 	tmp = __getname();
 	if (tmp)  {
@@ -704,7 +705,14 @@ static __always_inline void follow_dotdo
                         read_unlock(&current->fs->lock);
 			break;
 		}
-                read_unlock(&current->fs->lock);
+#ifdef CONFIG_VE
+		if (nd->dentry == get_exec_env()->fs_root &&
+		    nd->mnt == get_exec_env()->fs_rootmnt) {
+			read_unlock(&current->fs->lock);
+			break;
+		}
+#endif
+		read_unlock(&current->fs->lock);
 		spin_lock(&dcache_lock);
 		if (nd->dentry != nd->mnt->mnt_root) {
 			nd->dentry = dget(nd->dentry->d_parent);
@@ -745,6 +753,10 @@ static int do_lookup(struct nameidata *n
 	if (dentry->d_op && dentry->d_op->d_revalidate)
 		goto need_revalidate;
 done:
+	if ((nd->flags & LOOKUP_STRICT) && d_mountpoint(dentry)) {
+		dput(dentry);
+		return -ENOENT;
+	}
 	path->mnt = mnt;
 	path->dentry = dentry;
 	__follow_mount(path);
@@ -864,6 +876,9 @@ static fastcall int __link_path_walk(con
 			goto out_dput;
 
 		if (inode->i_op->follow_link) {
+			err = -ENOENT;
+			if (lookup_flags & LOOKUP_STRICT)
+				goto out_dput;
 			err = do_follow_link(&next, nd);
 			if (err)
 				goto return_err;
@@ -911,6 +926,7 @@ last_component:
 			break;
 		inode = next.dentry->d_inode;
 		if ((lookup_flags & LOOKUP_FOLLOW)
+		    && !(lookup_flags & LOOKUP_STRICT)
 		    && inode && inode->i_op && inode->i_op->follow_link) {
 			err = do_follow_link(&next, nd);
 			if (err)
@@ -951,6 +967,11 @@ return_reval:
 				break;
 		}
 return_base:
+		if (!(nd->flags & LOOKUP_NOAREACHECK)) {
+			err = check_area_access_ve(nd->dentry, nd->mnt);
+			if (err)
+				break;
+		}
 		return 0;
 out_dput:
 		dput_path(&next, nd);
@@ -1867,6 +1888,7 @@ asmlinkage long sys_mknod(const char __u
 {
 	return sys_mknodat(AT_FDCWD, filename, mode, dev);
 }
+EXPORT_SYMBOL_GPL(sys_mknod);
 
 int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
@@ -1925,6 +1947,7 @@ asmlinkage long sys_mkdir(const char __u
 {
 	return sys_mkdirat(AT_FDCWD, pathname, mode);
 }
+EXPORT_SYMBOL_GPL(sys_mkdir);
 
 /*
  * We try to drop the dentry early: we should have
@@ -1953,6 +1976,7 @@ void dentry_unhash(struct dentry *dentry
 	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
 }
+EXPORT_SYMBOL(sys_symlink);
 
 int vfs_rmdir(struct inode *dir, struct dentry *dentry)
 {
@@ -2032,6 +2056,7 @@ asmlinkage long sys_rmdir(const char __u
 {
 	return do_rmdir(AT_FDCWD, pathname);
 }
+EXPORT_SYMBOL_GPL(sys_rmdir);
 
 int vfs_unlink(struct inode *dir, struct dentry *dentry)
 {
@@ -2131,6 +2156,7 @@ asmlinkage long sys_unlink(const char __
 {
 	return do_unlinkat(AT_FDCWD, pathname);
 }
+EXPORT_SYMBOL_GPL(sys_unlink);
 
 int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode)
 {
@@ -2285,6 +2311,7 @@ asmlinkage long sys_link(const char __us
 {
 	return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
 }
+EXPORT_SYMBOL(sys_rename);
 
 /*
  * The worst of all namespace operations - renaming directory. "Perverted"
@@ -2396,6 +2423,9 @@ int vfs_rename(struct inode *old_dir, st
 	int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
 	const char *old_name;
 
+	if (DQUOT_RENAME(old_dentry->d_inode, old_dir, new_dir))
+		return -EXDEV;
+
 	if (old_dentry->d_inode == new_dentry->d_inode)
  		return 0;
  
diff -upr linux-2.6.16.46-0.12.orig/fs/namespace.c linux-2.6.16.46-0.12-027test011/fs/namespace.c
--- linux-2.6.16.46-0.12.orig/fs/namespace.c	2007-08-24 19:28:24.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/namespace.c	2007-08-28 17:35:36.000000000 +0400
@@ -40,6 +40,7 @@ static inline int sysfs_init(void)
 
 /* spinlock for vfsmount related operations, inplace of dcache_lock */
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
+EXPORT_SYMBOL(vfsmount_lock);
 
 static int event;
 
@@ -47,7 +48,7 @@ static struct list_head *mount_hashtable
 static int hash_mask __read_mostly, hash_bits __read_mostly;
 static kmem_cache_t *mnt_cache;
 struct rw_semaphore namespace_sem;
-EXPORT_SYMBOL_GPL(namespace_sem);
+EXPORT_SYMBOL(namespace_sem);
 
 /* /sys/fs */
 decl_subsys(fs, NULL, NULL);
@@ -66,6 +67,7 @@ struct vfsmount *alloc_vfsmnt(const char
 	struct vfsmount *mnt = kmem_cache_alloc(mnt_cache, GFP_KERNEL);
 	if (mnt) {
 		memset(mnt, 0, sizeof(struct vfsmount));
+		mnt->owner = VEID(get_exec_env());
 		atomic_set(&mnt->mnt_count, 1);
 		INIT_LIST_HEAD(&mnt->mnt_hash);
 		INIT_LIST_HEAD(&mnt->mnt_child);
@@ -77,7 +79,7 @@ struct vfsmount *alloc_vfsmnt(const char
 		INIT_LIST_HEAD(&mnt->mnt_slave);
 		if (name) {
 			int size = strlen(name) + 1;
-			char *newname = kmalloc(size, GFP_KERNEL);
+			char *newname = kmalloc(size, GFP_KERNEL_UBC);
 			if (newname) {
 				memcpy(newname, name, size);
 				mnt->mnt_devname = newname;
@@ -372,10 +374,32 @@ static int show_vfsmnt(struct seq_file *
 		{ 0, NULL }
 	};
 	struct proc_fs_info *fs_infop;
+	char *path_buf, *path;
 
-	mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
+	/* skip FS_NOMOUNT mounts (rootfs) */
+	if (mnt->mnt_sb->s_flags & MS_NOUSER)
+		return 0;
+
+	path_buf = (char *) __get_free_page(GFP_KERNEL);
+	if (!path_buf)
+		return -ENOMEM;
+	path = d_path(mnt->mnt_root, mnt, path_buf, PAGE_SIZE);
+	if (IS_ERR(path)) {
+		free_page((unsigned long) path_buf);
+		/*
+		 * This means that the file position will be incremented, i.e.
+		 * the total number of "invisible" vfsmnt will leak.
+		 */
+		return 0;
+	}
+
+	if (ve_is_super(get_exec_env()))
+		mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
+	else
+		mangle(m, mnt->mnt_sb->s_type->name);
 	seq_putc(m, ' ');
-	seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
+	mangle(m, path);
+	free_page((unsigned long) path_buf);
 	seq_putc(m, ' ');
 	mangle(m, mnt->mnt_sb->s_type->name);
 	seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw");
@@ -475,6 +499,7 @@ void release_mounts(struct list_head *he
 		mntput(mnt);
 	}
 }
+EXPORT_SYMBOL(release_mounts);
 
 void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
 {
@@ -499,6 +524,7 @@ void umount_tree(struct vfsmount *mnt, i
 		change_mnt_propagation(p, MS_PRIVATE);
 	}
 }
+EXPORT_SYMBOL(umount_tree);
 
 static int do_umount(struct vfsmount *mnt, int flags)
 {
@@ -586,6 +612,34 @@ static int do_umount(struct vfsmount *mn
 	return retval;
 }
 
+#ifdef CONFIG_VE
+void umount_ve_fs_type(struct file_system_type *local_fs_type)
+{
+	struct vfsmount *mnt;
+	struct list_head *p, *q;
+	LIST_HEAD(kill);
+	LIST_HEAD(umount_list);
+
+	down_write(&namespace_sem);
+	spin_lock(&vfsmount_lock);
+	list_for_each_safe(p, q, &current->namespace->list) {
+		mnt = list_entry(p, struct vfsmount, mnt_list);
+		if (mnt->mnt_sb->s_type != local_fs_type)
+			continue;
+		list_del(p);
+		list_add(p, &kill);
+	}
+
+	while (!list_empty(&kill)) {
+		mnt = list_entry(kill.next, struct vfsmount, mnt_list);
+		umount_tree(mnt, 1, &umount_list);
+	}
+	spin_unlock(&vfsmount_lock);
+	up_write(&namespace_sem);
+	release_mounts(&umount_list);
+}
+#endif
+
 /*
  * Now umount can handle mount points as well as block devices.
  * This is important for filesystems which use unnamed block devices.
@@ -609,7 +663,7 @@ asmlinkage long sys_umount(char __user *
 		goto dput_and_out;
 
 	retval = -EPERM;
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_VE_SYS_ADMIN))
 		goto dput_and_out;
 
 	retval = do_umount(nd.mnt, flags);
@@ -633,7 +687,7 @@ asmlinkage long sys_oldumount(char __use
 
 static int mount_is_safe(struct nameidata *nd)
 {
-	if (capable(CAP_SYS_ADMIN))
+	if (capable(CAP_VE_SYS_ADMIN))
 		return 0;
 	return -EPERM;
 #ifdef notyet
@@ -849,6 +903,8 @@ static int do_change_type(struct nameida
 
 	if (nd->dentry != nd->mnt->mnt_root)
 		return -EINVAL;
+	if (!ve_accessible_veid(nd->mnt->owner, get_exec_env()->veid))
+		return -EPERM;
 
 	down_write(&namespace_sem);
 	spin_lock(&vfsmount_lock);
@@ -862,7 +918,8 @@ static int do_change_type(struct nameida
 /*
  * do loopback mount.
  */
-static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
+static int do_loopback(struct nameidata *nd, char *old_name, int recurse,
+		int mnt_flags)
 {
 	struct nameidata old_nd;
 	struct vfsmount *mnt = NULL;
@@ -892,6 +949,7 @@ static int do_loopback(struct nameidata 
 	if (!mnt)
 		goto out;
 
+	mnt->mnt_flags |= mnt_flags;
 	err = graft_tree(mnt, nd);
 	if (err) {
 		LIST_HEAD(umount_list);
@@ -917,8 +975,9 @@ static int do_remount(struct nameidata *
 {
 	int err;
 	struct super_block *sb = nd->mnt->mnt_sb;
+	int bind;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_VE_SYS_ADMIN))
 		return -EPERM;
 
 	if (!check_mnt(nd->mnt))
@@ -927,12 +986,23 @@ static int do_remount(struct nameidata *
 	if (nd->dentry != nd->mnt->mnt_root)
 		return -EINVAL;
 
+	/* do not allow to remount bind-mounts with another mountpoint flags */
+	bind = 0;
+	if (nd->dentry != sb->s_root) {
+		if ((flags & ~(MS_BIND|MS_POSIXACL|MS_NOUSER)) != 0)
+			return -EINVAL;
+		bind = 1;
+	}
+
+	if (!ve_accessible_veid(nd->mnt->owner, get_exec_env()->veid))
+		return -EPERM;
+
 	down_write(&sb->s_umount);
-	err = do_remount_sb(sb, flags, data, 0);
+	err = bind ? 0 : do_remount_sb(sb, flags, data, 0);
 	if (!err)
 		nd->mnt->mnt_flags = mnt_flags;
 	up_write(&sb->s_umount);
-	if (!err)
+	if (!err && !bind)
 		security_sb_post_remount(nd->mnt, flags, data);
 	return err;
 }
@@ -952,7 +1022,7 @@ static int do_move_mount(struct nameidat
 	struct nameidata old_nd, parent_nd;
 	struct vfsmount *p;
 	int err = 0;
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_VE_SYS_ADMIN))
 		return -EPERM;
 	if (!old_name || !*old_name)
 		return -EINVAL;
@@ -960,6 +1030,10 @@ static int do_move_mount(struct nameidat
 	if (err)
 		return err;
 
+	err = -EPERM;
+	if (!ve_accessible_veid(old_nd.mnt->owner, get_exec_env()->veid))
+		goto out_nosem;
+
 	down_write(&namespace_sem);
 	while (d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
 		;
@@ -1015,6 +1089,7 @@ out:
 	up_write(&namespace_sem);
 	if (!err)
 		path_release(&parent_nd);
+out_nosem:
 	path_release(&old_nd);
 	return err;
 }
@@ -1032,7 +1107,7 @@ static int do_new_mount(struct nameidata
 		return -EINVAL;
 
 	/* we need capabilities... */
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_VE_SYS_ADMIN))
 		return -EPERM;
 
 	mnt = do_kern_mount(type, flags, name, data);
@@ -1070,6 +1145,11 @@ int do_add_mount(struct vfsmount *newmnt
 		goto unlock;
 
 	newmnt->mnt_flags = mnt_flags;
+
+	/* make this before graft_tree reveals mnt_root to the world... */
+	if (nd->dentry->d_flags & DCACHE_VIRTUAL)
+		newmnt->mnt_root->d_flags |= DCACHE_VIRTUAL;
+
 	if ((err = graft_tree(newmnt, nd)))
 		goto unlock;
 
@@ -1313,7 +1393,7 @@ long do_mount(char *dev_name, char *dir_
 		retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
 				    data_page);
 	else if (flags & MS_BIND)
-		retval = do_loopback(&nd, dev_name, flags & MS_REC);
+		retval = do_loopback(&nd, dev_name, flags & MS_REC, mnt_flags);
 	else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
 		retval = do_change_type(&nd, flags);
 	else if (flags & MS_MOVE)
@@ -1470,6 +1550,7 @@ out1:
 	free_page(type_page);
 	return retval;
 }
+EXPORT_SYMBOL_GPL(sys_mount);
 
 /*
  * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
@@ -1521,7 +1602,7 @@ static void chroot_fs_refs(struct nameid
 	struct fs_struct *fs;
 
 	read_lock(&tasklist_lock);
-	do_each_thread(g, p) {
+	do_each_thread_ve(g, p) {
 		task_lock(p);
 		fs = p->fs;
 		if (fs) {
@@ -1536,7 +1617,7 @@ static void chroot_fs_refs(struct nameid
 			put_fs_struct(fs);
 		} else
 			task_unlock(p);
-	} while_each_thread(g, p);
+	} while_each_thread_ve(g, p);
 	read_unlock(&tasklist_lock);
 }
 
@@ -1689,10 +1770,10 @@ static void __init init_mount_tree(void)
 
 	init_task.namespace = namespace;
 	read_lock(&tasklist_lock);
-	do_each_thread(g, p) {
+	do_each_thread_all(g, p) {
 		get_namespace(namespace);
 		p->namespace = namespace;
-	} while_each_thread(g, p);
+	} while_each_thread_all(g, p);
 	read_unlock(&tasklist_lock);
 
 	set_fs_pwd(current->fs, namespace->root, namespace->root->mnt_root);
@@ -1708,7 +1789,8 @@ void __init mnt_init(unsigned long mempa
 	init_rwsem(&namespace_sem);
 
 	mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
-			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL, NULL);
+			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_UBC,
+			NULL, NULL);
 
 	mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
 
@@ -1764,3 +1846,4 @@ void __put_namespace(struct namespace *n
 	release_mounts(&umount_list);
 	kfree(namespace);
 }
+EXPORT_SYMBOL_GPL(__put_namespace);
diff -upr linux-2.6.16.46-0.12.orig/fs/nfs/direct.c linux-2.6.16.46-0.12-027test011/fs/nfs/direct.c
--- linux-2.6.16.46-0.12.orig/fs/nfs/direct.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/nfs/direct.c	2007-08-28 17:35:36.000000000 +0400
@@ -188,7 +188,7 @@ static inline struct nfs_direct_req *nfs
 	return dreq;
 }
 
-static void nfs_direct_req_release(struct kref *kref)
+static void nfs_direct_req_free(struct kref *kref)
 {
 	struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
 
@@ -197,6 +197,11 @@ static void nfs_direct_req_release(struc
 	kmem_cache_free(nfs_direct_cachep, dreq);
 }
 
+static void nfs_direct_req_release(struct nfs_direct_req *dreq)
+{
+	kref_put(&dreq->kref, nfs_direct_req_free);
+}
+
 /*
  * Collects and returns the final error value/byte-count.
  */
@@ -216,7 +221,6 @@ static ssize_t nfs_direct_wait(struct nf
 		result = dreq->count;
 
 out:
-	kref_put(&dreq->kref, nfs_direct_req_release);
 	return (ssize_t) result;
 }
 
@@ -241,7 +245,7 @@ static void nfs_direct_complete(struct n
 	}
 	complete_all(&dreq->completion);
 
-	kref_put(&dreq->kref, nfs_direct_req_release);
+	nfs_direct_req_release(dreq);
 }
 
 /*
@@ -271,7 +275,7 @@ static struct nfs_direct_req *nfs_direct
 				list_del(&data->pages);
 				nfs_readdata_free(data);
 			}
-			kref_put(&dreq->kref, nfs_direct_req_release);
+			nfs_direct_req_release(dreq);
 			return NULL;
 		}
 
@@ -411,6 +415,7 @@ static ssize_t nfs_direct_read(struct ki
 	nfs_direct_read_schedule(dreq);
 	result = nfs_direct_wait(dreq);
 	rpc_clnt_sigunmask(clnt, &oldset);
+	nfs_direct_req_release(dreq);
 
 	return result;
 }
@@ -557,7 +562,7 @@ static struct nfs_direct_req *nfs_direct
 				list_del(&data->pages);
 				nfs_writedata_free(data);
 			}
-			kref_put(&dreq->kref, nfs_direct_req_release);
+			nfs_direct_req_release(dreq);
 			return NULL;
 		}
 
@@ -737,6 +742,7 @@ static ssize_t nfs_direct_write(struct k
 	nfs_direct_write_schedule(dreq, sync);
 	result = nfs_direct_wait(dreq);
 	rpc_clnt_sigunmask(clnt, &oldset);
+	nfs_direct_req_release(dreq);
 
 	return result;
 }
diff -upr linux-2.6.16.46-0.12.orig/fs/nfs/file.c linux-2.6.16.46-0.12-027test011/fs/nfs/file.c
--- linux-2.6.16.46-0.12.orig/fs/nfs/file.c	2007-08-24 19:28:27.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/nfs/file.c	2007-08-28 17:35:30.000000000 +0400
@@ -334,6 +334,11 @@ static int nfs_release_page(struct page 
 	return 0;
 }
 
+static int nfs_launder_page(struct page *page)
+{
+	return nfs_wb_page(page->mapping->host, page);
+}
+
 struct address_space_operations nfs_file_aops = {
 	.readpage = nfs_readpage,
 	.readpages = nfs_readpages,
@@ -347,6 +352,7 @@ struct address_space_operations nfs_file
 #ifdef CONFIG_NFS_DIRECTIO
 	.direct_IO = nfs_direct_IO,
 #endif
+	.launder_page = nfs_launder_page,
 };
 
 /* 
diff -upr linux-2.6.16.46-0.12.orig/fs/nfs/inode.c linux-2.6.16.46-0.12-027test011/fs/nfs/inode.c
--- linux-2.6.16.46-0.12.orig/fs/nfs/inode.c	2007-08-24 19:28:30.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/nfs/inode.c	2007-08-28 17:35:33.000000000 +0400
@@ -36,6 +36,9 @@
 #include <linux/mount.h>
 #include <linux/nfs_idmap.h>
 #include <linux/vfs.h>
+#include <linux/ve_proto.h>
+#include <linux/vzcalluser.h>
+#include <linux/ve_nfs.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -184,11 +187,15 @@ nfs_umount_begin(struct super_block *sb)
 	struct rpc_clnt	*rpc = NFS_SB(sb)->client;
 
 	/* -EIO all pending I/O */
-	if (!IS_ERR(rpc))
+	if (!IS_ERR(rpc)) {
+		rpc->cl_dead = 1;
 		rpc_killall_tasks(rpc);
+	}
 	rpc = NFS_SB(sb)->client_acl;
-	if (!IS_ERR(rpc))
+	if (!IS_ERR(rpc)) {
+		rpc->cl_dead = 1;
 		rpc_killall_tasks(rpc);
+	}
 }
 
 
@@ -1736,7 +1743,7 @@ static struct file_system_type nfs_fs_ty
 	.name		= "nfs",
 	.get_sb		= nfs_get_sb,
 	.kill_sb	= nfs_kill_super,
-	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+ 	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
 
 #ifdef CONFIG_NFS_V4
@@ -1755,6 +1762,74 @@ static struct super_operations nfs4_sops
 	.show_options	= nfs_show_options,
 };
 
+#ifdef CONFIG_VE
+static int ve_nfs_start(void *data)
+{
+	int err;
+	struct ve_struct *ve;
+	struct ve_nfs_context *ctx;
+
+	ve = (struct ve_struct *)data;
+	if (!(ve->features & VE_FEATURE_NFS))
+		return 0;
+	ctx = kzalloc(sizeof(struct ve_nfs_context), GFP_KERNEL);
+	if (ctx == NULL)
+		return -ENOMEM;
+
+	err = register_ve_fs_type(ve, &nfs_fs_type, &ctx->fstype, NULL);
+	if (err < 0)
+		goto fail;
+	ve->nfs_context = ctx;
+	return 0;
+
+fail:
+	kfree(ctx);
+	return err;	
+}
+
+static void ve_nfs_stop(void *data)
+{
+	struct ve_struct *ve;
+	struct super_block *sb;
+
+	ve = (struct ve_struct *)data;
+	if (ve->nfs_context == NULL)
+		return;
+
+	/* Basically, on a valid stop we can be here iff NFS was mounted
+	   read-only. In such a case client force-stop is not a problem.
+	   If we are here and NFS is read-write, we are in a FORCE stop, so
+	   force the client to stop.
+	   Lock daemon is already dead.
+	   Only superblock client remains. Den */
+
+	spin_lock(&sb_lock);
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		struct rpc_clnt *clnt;
+		if (sb->s_type != ve->nfs_context->fstype)
+			continue;
+		clnt = NFS_SB(sb)->client;
+		clnt->cl_dead = 1;
+		clnt->cl_xprt->ops->close(clnt->cl_xprt);
+		rpc_killall_tasks(clnt);
+	}
+	spin_unlock(&sb_lock);
+
+	unregister_ve_fs_type(ve->nfs_context->fstype, NULL);
+	kfree(ve->nfs_context->fstype);
+	kfree(ve->nfs_context);
+
+	ve->nfs_context = NULL;
+}
+
+static struct ve_hook nfs_hook = {
+	.init	  = ve_nfs_start,
+	.fini	  = ve_nfs_stop,
+	.owner	  = THIS_MODULE,
+	.priority = HOOK_PRIO_NET_POST,
+};
+#endif
+
 /*
  * Clean out any remaining NFSv4 state that might be left over due
  * to open() calls that passed nfs_atomic_lookup, but failed to call
@@ -2223,6 +2298,10 @@ static int __init init_nfs_fs(void)
 
 	nfs_sysctl_table = register_sysctl_table_path(nfs_sysctls, ctl_path);
 
+	err = rpciod_up();
+	if (err)
+		goto out5;
+
 	err = nfs_init_nfspagecache();
 	if (err)
 		goto out4;
@@ -2253,6 +2332,7 @@ static int __init init_nfs_fs(void)
 		goto out;
 	if ((err = register_nfs4fs()) != 0)
 		goto out;
+	ve_hook_register(VE_SS_CHAIN, &nfs_hook);
 	return 0;
 out:
 #ifdef CONFIG_PROC_FS
@@ -2270,6 +2350,8 @@ out2:
 out3:
 	nfs_destroy_nfspagecache();
 out4:
+	rpciod_down();
+out5:
 	if (nfs_sysctl_table)
 		unregister_sysctl_table(nfs_sysctl_table);
 	nfs_sysctl_table = NULL;
@@ -2279,6 +2361,9 @@ out4:
 
 static void __exit exit_nfs_fs(void)
 {
+	ve_hook_unregister(&nfs_hook);
+	rpciod_down();
+
 #ifdef CONFIG_NFS_DIRECTIO
 	nfs_destroy_directcache();
 #endif
diff -upr linux-2.6.16.46-0.12.orig/fs/nfs/nfsroot.c linux-2.6.16.46-0.12-027test011/fs/nfs/nfsroot.c
--- linux-2.6.16.46-0.12.orig/fs/nfs/nfsroot.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/nfs/nfsroot.c	2007-08-28 17:35:31.000000000 +0400
@@ -312,7 +312,7 @@ static int __init root_nfs_name(char *na
 	/* Override them by options set on kernel command-line */
 	root_nfs_parse(name, buf);
 
-	cp = system_utsname.nodename;
+	cp = ve_utsname.nodename;
 	if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
 		printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
 		return -1;
diff -upr linux-2.6.16.46-0.12.orig/fs/ntfs/super.c linux-2.6.16.46-0.12-027test011/fs/ntfs/super.c
--- linux-2.6.16.46-0.12.orig/fs/ntfs/super.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/ntfs/super.c	2007-08-28 17:35:29.000000000 +0400
@@ -3033,7 +3033,7 @@ iput_tmp_ino_err_out_now:
 	 * method again... FIXME: Do we need to do this twice now because of
 	 * attribute inodes? I think not, so leave as is for now... (AIA)
 	 */
-	if (invalidate_inodes(sb)) {
+	if (invalidate_inodes(sb, 0)) {
 		ntfs_error(sb, "Busy inodes left. This is most likely a NTFS "
 				"driver bug.");
 		/* Copied from fs/super.c. I just love this message. (-; */
diff -upr linux-2.6.16.46-0.12.orig/fs/open.c linux-2.6.16.46-0.12-027test011/fs/open.c
--- linux-2.6.16.46-0.12.orig/fs/open.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/open.c	2007-08-28 17:35:33.000000000 +0400
@@ -25,6 +25,7 @@
 #include <linux/fs.h>
 #include <linux/personality.h>
 #include <linux/pagemap.h>
+#include <linux/faudit.h>
 #include <linux/syscalls.h>
 #include <linux/rcupdate.h>
 #include <linux/audit.h>
@@ -52,7 +53,21 @@ int vfs_statfs(struct super_block *sb, s
 
 EXPORT_SYMBOL(vfs_statfs);
 
-static int vfs_statfs_native(struct super_block *sb, struct statfs *buf)
+int faudit_statfs(struct super_block *sb, struct kstatfs *buf)
+{
+	struct faudit_statfs_arg arg;
+
+	arg.sb = sb;
+	arg.stat = buf;
+
+	if (virtinfo_notifier_call(VITYPE_FAUDIT, VIRTINFO_FAUDIT_STATFS, &arg)
+			!= NOTIFY_DONE)
+		return arg.err;
+	return 0;
+}
+
+static int vfs_statfs_native(struct super_block *sb, struct vfsmount *mnt,
+		struct statfs *buf)
 {
 	struct kstatfs st;
 	int retval;
@@ -61,6 +76,10 @@ static int vfs_statfs_native(struct supe
 	if (retval)
 		return retval;
 
+	retval = faudit_statfs(mnt->mnt_sb, &st);
+	if (retval)
+		return retval;
+
 	if (sizeof(*buf) == sizeof(st))
 		memcpy(buf, &st, sizeof(st));
 	else {
@@ -95,7 +114,8 @@ static int vfs_statfs_native(struct supe
 	return 0;
 }
 
-static int vfs_statfs64(struct super_block *sb, struct statfs64 *buf)
+static int vfs_statfs64(struct super_block *sb, struct vfsmount *mnt,
+		struct statfs64 *buf)
 {
 	struct kstatfs st;
 	int retval;
@@ -104,6 +124,10 @@ static int vfs_statfs64(struct super_blo
 	if (retval)
 		return retval;
 
+	retval = faudit_statfs(mnt->mnt_sb, &st);
+	if (retval)
+		return retval;
+
 	if (sizeof(*buf) == sizeof(st))
 		memcpy(buf, &st, sizeof(st));
 	else {
@@ -130,7 +154,8 @@ asmlinkage long sys_statfs(const char __
 	error = user_path_walk(path, &nd);
 	if (!error) {
 		struct statfs tmp;
-		error = vfs_statfs_native(nd.dentry->d_inode->i_sb, &tmp);
+		error = vfs_statfs_native(nd.dentry->d_inode->i_sb,
+				nd.mnt, &tmp);
 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 			error = -EFAULT;
 		path_release(&nd);
@@ -149,7 +174,8 @@ asmlinkage long sys_statfs64(const char 
 	error = user_path_walk(path, &nd);
 	if (!error) {
 		struct statfs64 tmp;
-		error = vfs_statfs64(nd.dentry->d_inode->i_sb, &tmp);
+		error = vfs_statfs64(nd.dentry->d_inode->i_sb,
+				nd.mnt, &tmp);
 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 			error = -EFAULT;
 		path_release(&nd);
@@ -168,7 +194,8 @@ asmlinkage long sys_fstatfs(unsigned int
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs_native(file->f_dentry->d_inode->i_sb, &tmp);
+	error = vfs_statfs_native(file->f_dentry->d_inode->i_sb,
+			file->f_vfsmnt, &tmp);
 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 		error = -EFAULT;
 	fput(file);
@@ -189,7 +216,8 @@ asmlinkage long sys_fstatfs64(unsigned i
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs64(file->f_dentry->d_inode->i_sb, &tmp);
+	error = vfs_statfs64(file->f_dentry->d_inode->i_sb,
+			file->f_vfsmnt, &tmp);
 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 		error = -EFAULT;
 	fput(file);
@@ -368,52 +396,16 @@ asmlinkage long sys_ftruncate64(unsigned
  */
 asmlinkage long sys_utime(char __user * filename, struct utimbuf __user * times)
 {
-	int error;
-	struct nameidata nd;
-	struct inode * inode;
-	struct iattr newattrs;
-
-	error = user_path_walk(filename, &nd);
-	if (error)
-		goto out;
-	inode = nd.dentry->d_inode;
-
-	error = -EROFS;
-	if (IS_RDONLY(inode))
-		goto dput_and_out;
+	struct timeval tv[2];
 
-	/* Don't worry, the checks are done in inode_change_ok() */
-	newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
 	if (times) {
-		error = -EPERM;
-		if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
-			goto dput_and_out;
-
-		error = get_user(newattrs.ia_atime.tv_sec, &times->actime);
-		newattrs.ia_atime.tv_nsec = 0;
-		if (!error)
-			error = get_user(newattrs.ia_mtime.tv_sec, &times->modtime);
-		newattrs.ia_mtime.tv_nsec = 0;
-		if (error)
-			goto dput_and_out;
-
-		newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
-	} else {
-                error = -EACCES;
-                if (IS_IMMUTABLE(inode))
-                        goto dput_and_out;
-
-		if (current->fsuid != inode->i_uid &&
-		    (error = vfs_permission(&nd, MAY_WRITE)) != 0)
-			goto dput_and_out;
+		if (get_user(tv[0].tv_sec, &times->actime) ||
+		    get_user(tv[1].tv_sec, &times->modtime))
+			return -EFAULT;
+		tv[0].tv_usec = 0;
+		tv[1].tv_usec = 0;
 	}
-	mutex_lock(&inode->i_mutex);
-	error = notify_change(nd.dentry, &newattrs);
-	mutex_unlock(&inode->i_mutex);
-dput_and_out:
-	path_release(&nd);
-out:
-	return error;
+	return do_utimes(AT_FDCWD, filename, times ? tv : NULL, 0);
 }
 
 #endif
@@ -422,14 +414,19 @@ out:
  * must be owner or have write permission.
  * Else, update from *times, must be owner or super user.
  */
-long do_utimes(int dfd, char __user *filename, struct timeval *times)
+long do_utimes(int dfd, char __user *filename, struct timeval *times, int flags)
 {
-	int error;
+	int error = -EINVAL;
 	struct nameidata nd;
 	struct inode * inode;
 	struct iattr newattrs;
+	int follow;
 
-	error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd);
+	if ((flags & ~AT_SYMLINK_NOFOLLOW) != 0)
+		goto out;
+
+	follow = (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
+	error = __user_walk_fd(dfd, filename, follow, &nd);
 
 	if (error)
 		goto out;
@@ -475,7 +472,7 @@ asmlinkage long sys_futimesat(int dfd, c
 
 	if (utimes && copy_from_user(&times, utimes, sizeof(times)))
 		return -EFAULT;
-	return do_utimes(dfd, filename, utimes ? times : NULL);
+	return do_utimes(dfd, filename, utimes ? times : NULL, 0);
 }
 
 asmlinkage long sys_utimes(char __user *filename, struct timeval __user *utimes)
@@ -655,15 +652,20 @@ out:
 	return err;
 }
 
-asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
-			     mode_t mode)
+static long do_fchmodat(int dfd, const char __user *filename, mode_t mode,
+			int flags)
 {
 	struct nameidata nd;
 	struct inode * inode;
-	int error;
+	int error = -EINVAL;
 	struct iattr newattrs;
+	int follow;
 
-	error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd);
+	if ((flags & ~AT_SYMLINK_NOFOLLOW) != 0)
+		goto out;
+
+	follow = (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
+	error = __user_walk_fd(dfd, filename, follow, &nd);
 	if (error)
 		goto out;
 	inode = nd.dentry->d_inode;
@@ -690,6 +692,12 @@ out:
 	return error;
 }
 
+asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
+			     mode_t mode)
+{
+	return do_fchmodat(dfd, filename, mode, 0);
+}
+
 asmlinkage long sys_chmod(const char __user *filename, mode_t mode)
 {
 	return sys_fchmodat(AT_FDCWD, filename, mode);
@@ -742,6 +750,7 @@ asmlinkage long sys_chown(const char __u
 	}
 	return error;
 }
+EXPORT_SYMBOL_GPL(sys_chown);
 
 asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
 			     gid_t group, int flag)
@@ -1229,3 +1238,23 @@ int nonseekable_open(struct inode *inode
 }
 
 EXPORT_SYMBOL(nonseekable_open);
+
+asmlinkage long sys_lchmod(char __user * filename, mode_t mode)
+{
+	return do_fchmodat(AT_FDCWD, filename, mode, AT_SYMLINK_NOFOLLOW);
+}
+
+asmlinkage long sys_lutime(char __user * filename,
+		struct utimbuf __user * times)
+{
+	struct timeval tv[2];
+
+	if (times) {
+		if (get_user(tv[0].tv_sec, &times->actime) ||
+		    get_user(tv[1].tv_sec, &times->modtime))
+			return -EFAULT;
+		tv[0].tv_usec = 0;
+		tv[1].tv_usec = 0;
+	}
+	return do_utimes(AT_FDCWD, filename, times ? tv : NULL, AT_SYMLINK_NOFOLLOW);
+}
diff -upr linux-2.6.16.46-0.12.orig/fs/partitions/check.c linux-2.6.16.46-0.12-027test011/fs/partitions/check.c
--- linux-2.6.16.46-0.12.orig/fs/partitions/check.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/partitions/check.c	2007-08-28 17:35:31.000000000 +0400
@@ -128,6 +128,7 @@ char *disk_name(struct gendisk *hd, int 
 
 	return buf;
 }
+EXPORT_SYMBOL(disk_name);
 
 const char *bdevname(struct block_device *bdev, char *buf)
 {
diff -upr linux-2.6.16.46-0.12.orig/fs/pipe.c linux-2.6.16.46-0.12-027test011/fs/pipe.c
--- linux-2.6.16.46-0.12.orig/fs/pipe.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/pipe.c	2007-08-28 17:35:33.000000000 +0400
@@ -19,6 +19,8 @@
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
 
+#include <ub/ub_mem.h>
+
 /*
  * We use a start+len construction, which provides full use of the 
  * allocated memory.
@@ -284,7 +286,7 @@ pipe_writev(struct file *filp, const str
 			int error;
 
 			if (!page) {
-				page = alloc_page(GFP_HIGHUSER);
+				page = alloc_page(GFP_HIGHUSER | __GFP_UBC);
 				if (unlikely(!page)) {
 					ret = ret ? : -ENOMEM;
 					break;
@@ -662,7 +664,7 @@ struct inode* pipe_new(struct inode* ino
 {
 	struct pipe_inode_info *info;
 
-	info = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
+	info = ub_kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
 	if (!info)
 		goto fail_page;
 	memset(info, 0, sizeof(*info));
@@ -797,6 +799,7 @@ close_f1:
 no_files:
 	return error;	
 }
+EXPORT_SYMBOL_GPL(do_pipe);
 
 /*
  * pipefs should _never_ be mounted by userland - too much of security hassle,
diff -upr linux-2.6.16.46-0.12.orig/fs/proc/array.c linux-2.6.16.46-0.12-027test011/fs/proc/array.c
--- linux-2.6.16.46-0.12.orig/fs/proc/array.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/proc/array.c	2007-08-28 17:35:34.000000000 +0400
@@ -76,6 +76,9 @@
 #include <linux/cpuset.h>
 #include <linux/rcupdate.h>
 #include <linux/delayacct.h>
+#include <linux/fairsched.h>
+
+#include <ub/beancounter.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -162,8 +165,14 @@ static inline char * task_state(struct t
 	struct group_info *group_info;
 	int g;
 	struct fdtable *fdt = NULL;
+	pid_t pid, ppid, tgid, vpid;
+
+	pid = get_task_pid(p);
+	tgid = get_task_tgid(p);
 
 	read_lock(&tasklist_lock);
+	ppid = get_task_ppid(p);
+	vpid = (pid_alive(p) ? virt_pid(p) : 0);
 	buffer += sprintf(buffer,
 		"State:\t%s\n"
 		"SleepAVG:\t%lu%%\n"
@@ -171,13 +180,19 @@ static inline char * task_state(struct t
 		"Pid:\t%d\n"
 		"PPid:\t%d\n"
 		"TracerPid:\t%d\n"
+#ifdef CONFIG_FAIRSCHED
+		"FNid:\t%d\n"
+#endif
 		"Uid:\t%d\t%d\t%d\t%d\n"
 		"Gid:\t%d\t%d\t%d\t%d\n",
 		get_task_state(p),
 		(p->sleep_avg/1024)*100/(1020000000/1024),
-	       	p->tgid,
-		p->pid, pid_alive(p) ? p->group_leader->real_parent->tgid : 0,
-		pid_alive(p) && p->ptrace ? p->parent->pid : 0,
+	       	tgid,
+		pid, ppid,
+		pid_alive(p) && p->ptrace ? get_task_pid(p->parent) : 0,
+#ifdef CONFIG_FAIRSCHED
+		task_fairsched_node_id(p),
+#endif
 		p->uid, p->euid, p->suid, p->fsuid,
 		p->gid, p->egid, p->sgid, p->fsgid);
 	read_unlock(&tasklist_lock);
@@ -200,6 +215,17 @@ static inline char * task_state(struct t
 	put_group_info(group_info);
 
 	buffer += sprintf(buffer, "\n");
+
+#ifdef CONFIG_VE
+	buffer += sprintf(buffer,
+			"envID:\t%d\n"
+			"VPid:\t%d\n"
+			"PNState:\t%u\n"
+			"StopState:\t%u\n",
+			VE_TASK_INFO(p)->owner_env->veid, virt_pid(p),
+			p->pn_state,
+			p->stopped_state);
+#endif
 	return buffer;
 }
 
@@ -245,7 +271,7 @@ static void collect_sigign_sigcatch(stru
 
 static inline char * task_sig(struct task_struct *p, char *buffer)
 {
-	sigset_t pending, shpending, blocked, ignored, caught;
+	sigset_t pending, shpending, blocked, ignored, caught, saved;
 	int num_threads = 0;
 	unsigned long qsize = 0;
 	unsigned long qlim = 0;
@@ -255,6 +281,7 @@ static inline char * task_sig(struct tas
 	sigemptyset(&blocked);
 	sigemptyset(&ignored);
 	sigemptyset(&caught);
+	sigemptyset(&saved);
 
 	/* Gather all the data with the appropriate locks held */
 	read_lock(&tasklist_lock);
@@ -263,6 +290,7 @@ static inline char * task_sig(struct tas
 		pending = p->pending.signal;
 		shpending = p->signal->shared_pending.signal;
 		blocked = p->blocked;
+		saved = p->saved_sigmask;
 		collect_sigign_sigcatch(p, &ignored, &caught);
 		num_threads = atomic_read(&p->signal->count);
 		qsize = atomic_read(&p->user->sigpending);
@@ -280,6 +308,7 @@ static inline char * task_sig(struct tas
 	buffer = render_sigset_t("SigBlk:\t", &blocked, buffer);
 	buffer = render_sigset_t("SigIgn:\t", &ignored, buffer);
 	buffer = render_sigset_t("SigCgt:\t", &caught, buffer);
+	buffer = render_sigset_t("SigSvd:\t", &saved, buffer);
 
 	return buffer;
 }
@@ -294,10 +323,27 @@ static inline char *task_cap(struct task
 			    cap_t(p->cap_effective));
 }
 
+#ifdef CONFIG_USER_RESOURCE
+static inline void ub_dump_task_info(struct task_struct *tsk,
+		char *stsk, int ltsk, char *smm, int lmm)
+{
+	print_ub_uid(tsk->task_bc.task_ub, stsk, ltsk);
+	task_lock(tsk);
+	if (tsk->mm)
+		print_ub_uid(tsk->mm->mm_ub, smm, lmm);
+	else
+		strncpy(smm, "N/A", lmm);
+	task_unlock(tsk);
+}
+#endif
+
 int proc_pid_status(struct task_struct *task, char * buffer)
 {
 	char * orig = buffer;
 	struct mm_struct *mm = get_task_mm(task);
+#ifdef CONFIG_USER_RESOURCE
+	char tsk_ub_info[64], mm_ub_info[64];
+#endif
 
 	buffer = task_name(task, buffer);
 	buffer = task_state(task, buffer);
@@ -312,6 +358,14 @@ int proc_pid_status(struct task_struct *
 #if defined(CONFIG_S390)
 	buffer = task_show_regs(task, buffer);
 #endif
+#ifdef CONFIG_USER_RESOURCE
+	ub_dump_task_info(task,
+			tsk_ub_info, sizeof(tsk_ub_info),
+			mm_ub_info, sizeof(mm_ub_info));
+
+	buffer += sprintf(buffer, "TaskUB:\t%s\n", tsk_ub_info);
+	buffer += sprintf(buffer, "MMUB:\t%s\n", mm_ub_info);
+#endif
 	return buffer - orig;
 }
 
@@ -334,6 +388,10 @@ static int do_task_stat(struct task_stru
 	DEFINE_KTIME(it_real_value);
 	struct task_struct *t;
 	char tcomm[sizeof(task->comm)];
+#ifdef CONFIG_USER_RESOURCE
+	char ub_task_info[64];
+	char ub_mm_info[64];
+#endif
 
 	state = *get_task_state(task);
 	vsize = eip = esp = 0;
@@ -371,11 +429,12 @@ static int do_task_stat(struct task_stru
 	}
 	if (task->signal) {
 		if (task->signal->tty) {
-			tty_pgrp = task->signal->tty->pgrp;
+			tty_pgrp = pid_type_to_vpid(PIDTYPE_PGID,
+						    task->signal->tty->pgrp);
 			tty_nr = new_encode_dev(tty_devnum(task->signal->tty));
 		}
-		pgid = process_group(task);
-		sid = task->signal->session;
+		pgid = get_task_pgid(task);
+		sid = get_task_sid(task);
 		cmin_flt = task->signal->cmin_flt;
 		cmaj_flt = task->signal->cmaj_flt;
 		cutime = task->signal->cutime;
@@ -389,7 +448,7 @@ static int do_task_stat(struct task_stru
 		}
 		it_real_value = task->signal->real_timer.expires;
 	}
-	ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0;
+	ppid = get_task_ppid(task);
 	read_unlock(&tasklist_lock);
 
 	if (!whole || num_threads<2)
@@ -406,17 +465,34 @@ static int do_task_stat(struct task_stru
 	priority = task_prio(task);
 	nice = task_nice(task);
 
+#ifndef CONFIG_VE
 	/* Temporary variable needed for gcc-2.96 */
 	/* convert timespec -> nsec*/
 	start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC
 				+ task->start_time.tv_nsec;
 	/* convert nsec -> ticks */
 	start_time = nsec_to_clock_t(start_time);
+#else
+	start_time = ve_relative_clock(&task->start_time);
+#endif
+
+#ifdef CONFIG_USER_RESOURCE
+	ub_dump_task_info(task,
+			ub_task_info, sizeof(ub_task_info),
+			ub_mm_info, sizeof(ub_mm_info));
+#endif
 
 	res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
 %lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \
-%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu %llu\n",
-		task->pid,
+%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu %llu"
+#ifdef CONFIG_VE
+" 0 0 0 0 0 0 0 %d %u"
+#endif
+#ifdef CONFIG_USER_RESOURCE
+		" %s %s"
+#endif
+		"\n",
+		get_task_pid(task),
 		tcomm,
 		state,
 		ppid,
@@ -461,7 +537,16 @@ static int do_task_stat(struct task_stru
 		task_cpu(task),
 		task->rt_priority,
 		task->policy,
-		(unsigned long long)delayacct_blkio_ticks(task));
+		(unsigned long long)delayacct_blkio_ticks(task)
+#ifdef CONFIG_VE
+		, virt_pid(task),
+		VEID(VE_TASK_INFO(task)->owner_env)
+#endif
+#ifdef CONFIG_USER_RESOURCE
+		, ub_task_info,
+		ub_mm_info
+#endif
+		);
 	if(mm)
 		mmput(mm);
 	return res;
diff -upr linux-2.6.16.46-0.12.orig/fs/proc/base.c linux-2.6.16.46-0.12-027test011/fs/proc/base.c
--- linux-2.6.16.46-0.12.orig/fs/proc/base.c	2007-08-24 19:28:27.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/proc/base.c	2007-08-28 17:35:34.000000000 +0400
@@ -295,6 +295,7 @@ static int proc_fd_link(struct inode *in
 	struct files_struct *files;
 	struct file *file;
 	int fd = proc_type(inode) - PROC_TID_FD_DIR;
+	int err = -ENOENT;
 
 	files = get_files_struct(task);
 	if (files) {
@@ -305,16 +306,18 @@ static int proc_fd_link(struct inode *in
 		spin_lock(&files->file_lock);
 		file = fcheck_files(files, fd);
 		if (file) {
-			*mnt = mntget(file->f_vfsmnt);
-			*dentry = dget(file->f_dentry);
-			spin_unlock(&files->file_lock);
-			put_files_struct(files);
-			return 0;
+			if (d_root_check(file->f_dentry, file->f_vfsmnt)) {
+				err = -EACCES;
+			} else {
+				*mnt = mntget(file->f_vfsmnt);
+				*dentry = dget(file->f_dentry);
+				err = 0;
+			}
 		}
 		spin_unlock(&files->file_lock);
 		put_files_struct(files);
 	}
-	return -ENOENT;
+	return err;
 }
 
 static struct fs_struct *get_fs_struct(struct task_struct *task)
@@ -334,10 +337,12 @@ static int proc_cwd_link(struct inode *i
 	int result = -ENOENT;
 	if (fs) {
 		read_lock(&fs->lock);
-		*mnt = mntget(fs->pwdmnt);
-		*dentry = dget(fs->pwd);
+		result = d_root_check(fs->pwd, fs->pwdmnt);
+		if (!result) {
+			*mnt = mntget(fs->pwdmnt);
+			*dentry = dget(fs->pwd);
+		}
 		read_unlock(&fs->lock);
-		result = 0;
 		put_fs_struct(fs);
 	}
 	return result;
@@ -527,7 +532,9 @@ static int proc_oom_score(struct task_st
 	struct timespec uptime;
 
 	do_posix_clock_monotonic_gettime(&uptime);
+	read_lock(&tasklist_lock);
 	points = badness(task, uptime.tv_sec);
+	read_unlock(&tasklist_lock);
 	return sprintf(buffer, "%lu\n", points);
 }
 
@@ -594,6 +601,17 @@ static int proc_permission(struct inode 
 	return proc_check_root(inode);
 }
 
+static int proc_fd_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+	int err;
+
+	err = proc_permission(inode, mask, nd);
+	if (err != 0 && proc_task(inode) == current)
+		err = 0;
+
+	return err;
+}
+
 static int proc_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	int error;
@@ -959,6 +977,8 @@ static ssize_t oom_adjust_write(struct f
 	oom_adjust = simple_strtol(buffer, &end, 0);
 	if ((oom_adjust < -16 || oom_adjust > 15) && oom_adjust != OOM_DISABLE)
 		return -EINVAL;
+	if (oom_adjust == OOM_DISABLE && !ve_is_super(get_exec_env()))
+		return -EPERM;
 	if (*end == '\n')
 		end++;
 	task->oomkilladj = oom_adjust;
@@ -1384,6 +1404,10 @@ static struct inode *proc_pid_make_inode
 	struct inode * inode;
 	struct proc_inode *ei;
 
+	if (!ve_accessible(VE_TASK_INFO(task)->owner_env,
+				sb->s_type->owner_env))
+		return NULL;
+
 	/* We need a new inode */
 	
 	inode = new_inode(sb);
@@ -1490,17 +1514,32 @@ static void pid_base_iput(struct dentry 
 	spin_lock(&task->proc_lock);
 	if (task->proc_dentry == dentry)
 		task->proc_dentry = NULL;
+#ifdef CONFIG_VE
+	if (VE_TASK_INFO(task)->glob_proc_dentry == dentry)
+		VE_TASK_INFO(task)->glob_proc_dentry = NULL;
+#endif
 	spin_unlock(&task->proc_lock);
 	iput(inode);
 }
 
 static int pid_delete_dentry(struct dentry * dentry)
 {
+	struct task_struct *tsk;
+
+	tsk = proc_task(dentry->d_inode);
+#ifdef CONFIG_VE
+	/*
+	 * Don't hash dentries from VE0 that may hold VE's pids
+	 */
+	if (!ve_accessible_strict(dentry->d_sb->s_type->owner_env,
+			tsk->ve_task_info.owner_env))
+		return 1;
+#endif
 	/* Is the task we represent dead?
 	 * If so, then don't put the dentry on the lru list,
 	 * kill it immediately.
 	 */
-	return !pid_alive(proc_task(dentry->d_inode));
+	return !pid_alive(tsk);
 }
 
 static struct dentry_operations tid_fd_dentry_operations =
@@ -1619,7 +1658,7 @@ static struct file_operations proc_task_
  */
 static struct inode_operations proc_fd_inode_operations = {
 	.lookup		= proc_lookupfd,
-	.permission	= proc_permission,
+	.permission	= proc_fd_permission,
 	.setattr	= proc_setattr,
 };
 
@@ -1981,14 +2020,14 @@ static int proc_self_readlink(struct den
 			      int buflen)
 {
 	char tmp[30];
-	sprintf(tmp, "%d", current->tgid);
+	sprintf(tmp, "%d", get_task_tgid(current));
 	return vfs_readlink(dentry,buffer,buflen,tmp);
 }
 
 static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	char tmp[30];
-	sprintf(tmp, "%d", current->tgid);
+	sprintf(tmp, "%d", get_task_tgid(current));
 	return ERR_PTR(vfs_follow_link(nd,tmp));
 }	
 
@@ -2014,11 +2053,8 @@ static struct inode_operations proc_self
  *   of PIDTYPE_PID.
  */
 
-struct dentry *proc_pid_unhash(struct task_struct *p)
+struct dentry *__proc_pid_unhash(struct task_struct *p, struct dentry *proc_dentry)
 {
-	struct dentry *proc_dentry;
-
-	proc_dentry = p->proc_dentry;
 	if (proc_dentry != NULL) {
 
 		spin_lock(&dcache_lock);
@@ -2036,6 +2072,14 @@ struct dentry *proc_pid_unhash(struct ta
 	return proc_dentry;
 }
 
+void proc_pid_unhash(struct task_struct *p, struct dentry *pd[2])
+{
+	pd[0] = __proc_pid_unhash(p, p->proc_dentry);
+#ifdef CONFIG_VE
+	pd[1] = __proc_pid_unhash(p, VE_TASK_INFO(p)->glob_proc_dentry);
+#endif
+}
+
 /**
  * proc_pid_flush - recover memory used by stale /proc/@pid/x entries
  * @proc_dentry: directoy to prune.
@@ -2043,7 +2087,7 @@ struct dentry *proc_pid_unhash(struct ta
  * Shrink the /proc directory that was used by the just killed thread.
  */
 	
-void proc_pid_flush(struct dentry *proc_dentry)
+void __proc_pid_flush(struct dentry *proc_dentry)
 {
 	might_sleep();
 	if(proc_dentry != NULL) {
@@ -2052,12 +2096,21 @@ void proc_pid_flush(struct dentry *proc_
 	}
 }
 
+void proc_pid_flush(struct dentry *proc_dentry[2])
+{
+	__proc_pid_flush(proc_dentry[0]);
+#ifdef CONFIG_VE
+	__proc_pid_flush(proc_dentry[1]);
+#endif
+}
+
 /* SMP-safe */
 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
 {
 	struct task_struct *task;
 	struct inode *inode;
 	struct proc_inode *ei;
+	struct dentry *pd[2];
 	unsigned tgid;
 	int died;
 
@@ -2081,7 +2134,19 @@ struct dentry *proc_pid_lookup(struct in
 		goto out;
 
 	read_lock(&tasklist_lock);
-	task = find_task_by_pid(tgid);
+	task = find_task_by_pid_ve(tgid);
+	/* In theory we are allowed to lookup both /proc/VIRT_PID and
+	 * /proc/GLOBAL_PID inside VE. However, current /proc implementation
+	 * cannot maintain two references to one task, so that we have
+	 * to prohibit /proc/GLOBAL_PID.
+	 */
+	if (task && !ve_is_super(get_exec_env()) && !is_virtual_pid(tgid)) {
+		/* However, VE_ENTERed tasks are exception, they use global
+		 * pids.
+		 */
+		if (virt_pid(task) != tgid)
+			task = NULL;
+	}
 	if (task)
 		get_task_struct(task);
 	read_unlock(&tasklist_lock);
@@ -2110,16 +2175,23 @@ struct dentry *proc_pid_lookup(struct in
 	died = 0;
 	d_add(dentry, inode);
 	spin_lock(&task->proc_lock);
+#ifdef CONFIG_VE
+	if (ve_is_super(inode->i_sb->s_type->owner_env))
+		VE_TASK_INFO(task)->glob_proc_dentry = dentry;
+	else
+		task->proc_dentry = dentry;
+#else
 	task->proc_dentry = dentry;
+#endif
 	if (!pid_alive(task)) {
-		dentry = proc_pid_unhash(task);
+		proc_pid_unhash(task, pd);
 		died = 1;
 	}
 	spin_unlock(&task->proc_lock);
 
 	put_task_struct(task);
 	if (died) {
-		proc_pid_flush(dentry);
+		proc_pid_flush(pd);
 		goto out;
 	}
 	return NULL;
@@ -2140,7 +2212,12 @@ static struct dentry *proc_task_lookup(s
 		goto out;
 
 	read_lock(&tasklist_lock);
-	task = find_task_by_pid(tid);
+	task = find_task_by_pid_ve(tid);
+	/* See comment above in similar place. */
+	if (task && !ve_is_super(get_exec_env()) && !is_virtual_pid(tid)) {
+		if (virt_pid(task) != tid)
+			task = NULL;
+	}
 	if (task)
 		get_task_struct(task);
 	read_unlock(&tasklist_lock);
@@ -2184,16 +2261,23 @@ out:
  * tasklist lock while doing this, and we must release it before
  * we actually do the filldir itself, so we use a temp buffer..
  */
-static int get_tgid_list(int index, unsigned long version, unsigned int *tgids)
+static int get_tgid_list(int index, unsigned long version, unsigned int *tgids,
+		struct ve_struct *ve)
 {
 	struct task_struct *p;
 	int nr_tgids = 0;
 
 	index--;
 	read_lock(&tasklist_lock);
+	if (list_empty(&ve->vetask_lh))
+		goto out;
 	p = NULL;
 	if (version) {
-		p = find_task_by_pid(version);
+		struct ve_struct *oldve;
+
+		oldve = set_exec_env(ve);
+		p = find_task_by_pid_ve(version);
+		(void)set_exec_env(oldve);
 		if (p && !thread_group_leader(p))
 			p = NULL;
 	}
@@ -2201,10 +2285,10 @@ static int get_tgid_list(int index, unsi
 	if (p)
 		index = 0;
 	else
-		p = next_task(&init_task);
+		p = __first_task_ve(ve);
 
-	for ( ; p != &init_task; p = next_task(p)) {
-		int tgid = p->pid;
+	for ( ; p != NULL; p = __next_task_ve(ve, p)) {
+		int tgid = get_task_pid_ve(p, ve);
 		if (!pid_alive(p))
 			continue;
 		if (--index >= 0)
@@ -2214,6 +2298,7 @@ static int get_tgid_list(int index, unsi
 		if (nr_tgids >= PROC_MAXPIDS)
 			break;
 	}
+out:
 	read_unlock(&tasklist_lock);
 	return nr_tgids;
 }
@@ -2237,7 +2322,7 @@ static int get_tid_list(int index, unsig
 	 * via next_thread().
 	 */
 	if (pid_alive(task)) do {
-		int tid = task->pid;
+		int tid = get_task_pid(task);
 
 		if (--index >= 0)
 			continue;
@@ -2274,7 +2359,8 @@ int proc_pid_readdir(struct file * filp,
 	next_tgid = filp->f_version;
 	filp->f_version = 0;
 	for (;;) {
-		nr_tgids = get_tgid_list(nr, next_tgid, tgid_array);
+		nr_tgids = get_tgid_list(nr, next_tgid, tgid_array,
+				filp->f_dentry->d_sb->s_type->owner_env);
 		if (!nr_tgids) {
 			/* no more entries ! */
 			break;
diff -upr linux-2.6.16.46-0.12.orig/fs/proc/generic.c linux-2.6.16.46-0.12-027test011/fs/proc/generic.c
--- linux-2.6.16.46-0.12.orig/fs/proc/generic.c	2007-08-24 19:28:10.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/proc/generic.c	2007-08-28 17:35:36.000000000 +0400
@@ -10,6 +10,7 @@
 
 #include <linux/errno.h>
 #include <linux/time.h>
+#include <linux/fs.h>
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
 #include <linux/module.h>
@@ -30,7 +31,7 @@ static ssize_t proc_file_write(struct fi
 			       size_t count, loff_t *ppos);
 static loff_t proc_file_lseek(struct file *, loff_t, int);
 
-DEFINE_SPINLOCK(proc_subdir_lock);
+static DEFINE_RWLOCK(proc_tree_lock);
 
 int proc_match(int len, const char *name, struct proc_dir_entry *de)
 {
@@ -238,6 +239,10 @@ static int proc_notify_change(struct den
 	struct proc_dir_entry *de = PDE(inode);
 	int error;
 
+	if ((iattr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) &&
+	    LPDE(inode) == GPDE(inode))
+		return -EPERM;
+
 	error = inode_change_ok(inode, iattr);
 	if (error)
 		goto out;
@@ -274,7 +279,7 @@ static struct inode_operations proc_file
  * returns the struct proc_dir_entry for "/proc/tty/driver", and
  * returns "serial" in residual.
  */
-static int xlate_proc_name(const char *name,
+static int __xlate_proc_name(struct proc_dir_entry *root, const char *name,
 			   struct proc_dir_entry **ret, const char **residual)
 {
 	const char     		*cp = name, *next;
@@ -282,8 +287,13 @@ static int xlate_proc_name(const char *n
 	int			len;
 	int 			rtn = 0;
 
-	spin_lock(&proc_subdir_lock);
-	de = &proc_root;
+	if (*ret) {
+		de_get(*ret);
+		return 0;
+	}
+
+	read_lock(&proc_tree_lock);
+	de = root;
 	while (1) {
 		next = strchr(cp, '/');
 		if (!next)
@@ -301,12 +311,29 @@ static int xlate_proc_name(const char *n
 		cp += len + 1;
 	}
 	*residual = cp;
-	*ret = de;
+	*ret = de_get(de);
 out:
-	spin_unlock(&proc_subdir_lock);
+	read_unlock(&proc_tree_lock);
 	return rtn;
 }
 
+#ifndef CONFIG_VE
+#define xlate_proc_loc_name xlate_proc_name
+#else
+static int xlate_proc_loc_name(const char *name,
+			   struct proc_dir_entry **ret, const char **residual)
+{
+	return __xlate_proc_name(get_exec_env()->proc_root,
+			name, ret, residual);
+}
+#endif
+
+static int xlate_proc_name(const char *name,
+		struct proc_dir_entry **ret, const char **residual)
+{
+	return __xlate_proc_name(&proc_root, name, ret, residual);
+}
+
 static DEFINE_IDR(proc_inum_idr);
 static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
 
@@ -378,6 +405,20 @@ static struct dentry_operations proc_den
 	.d_delete	= proc_delete_dentry,
 };
 
+static struct proc_dir_entry *__proc_lookup(struct proc_dir_entry *dir,
+		struct dentry *d)
+{
+	struct proc_dir_entry *de;
+
+	for (de = dir->subdir; de; de = de->next) {
+		if (de->namelen != d->d_name.len)
+			continue;
+		if (!memcmp(d->d_name.name, de->name, de->namelen))
+			break;
+	}
+	return de_get(de);
+}
+
 /*
  * Don't create negative dentries here, return -ENOENT by hand
  * instead.
@@ -385,38 +426,158 @@ static struct dentry_operations proc_den
 struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
 {
 	struct inode *inode = NULL;
-	struct proc_dir_entry * de;
+	struct proc_dir_entry *lde, *gde;
 	int error = -ENOENT;
 
 	lock_kernel();
-	spin_lock(&proc_subdir_lock);
-	de = PDE(dir);
-	if (de) {
-		for (de = de->subdir; de ; de = de->next) {
-			if (de->namelen != dentry->d_name.len)
-				continue;
-			if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
-				unsigned int ino = de->low_ino;
-
-				spin_unlock(&proc_subdir_lock);
-				error = -EINVAL;
-				inode = proc_get_inode(dir->i_sb, ino, de);
-				spin_lock(&proc_subdir_lock);
-				break;
-			}
-		}
-	}
-	spin_unlock(&proc_subdir_lock);
+	lde = LPDE(dir);
+
+	if (!lde)
+		goto out;
+
+	read_lock(&proc_tree_lock);
+	lde = __proc_lookup(lde, dentry);
+	if (lde && !try_module_get(lde->owner)) {
+		de_put(lde);
+		lde = NULL;
+	}
+#ifdef CONFIG_VE
+	gde = GPDE(dir);
+	if (gde)
+		gde = __proc_lookup(gde, dentry);
+	if (!lde && gde && !try_module_get(gde->owner)) {
+		de_put(gde);
+		gde = NULL;
+	}
+#else
+	gde = NULL;
+#endif
+	read_unlock(&proc_tree_lock);
+
+	/*
+	 * There are following possible cases after lookup:
+	 *
+	 * lde		gde
+	 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+	 * NULL		NULL		ENOENT
+	 * loc		NULL		found in local tree
+	 * loc		glob		found in both trees
+	 * NULL		glob		found in global tree
+	 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+	 *
+	 * We initialized inode as follows after lookup:
+	 *
+	 * inode->lde	inode->gde
+	 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+	 * loc		NULL		in local tree
+	 * loc		glob		both trees
+	 * glob		glob		global tree
+	 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+	 * i.e. inode->lde is always initialized
+	 */
+
+	if (lde == NULL && gde == NULL)
+		goto out;
+
+	if (lde != NULL)
+		inode = proc_get_inode(dir->i_sb, lde->low_ino, lde);
+	else
+		inode = proc_get_inode(dir->i_sb, gde->low_ino, gde);
+
+	/*
+	 * We can sleep in proc_get_inode(), but since we have i_sem
+	 * being taken, no one can setup GPDE/LPDE on this inode.
+	 */
+	if (!inode)
+		goto out_put;
+
+#ifdef CONFIG_VE
+	GPDE(inode) = de_get(gde);
+	if (gde)
+		__module_get(gde->owner);
+
+	/* if dentry is found in both trees and it is a directory
+	 * then inode's nlink count must be altered, because local
+	 * and global subtrees may differ.
+	 * on the other hand, they may intersect, so actual nlink
+	 * value is difficult to calculate - upper estimate is used
+	 * instead of it.
+	 * dentry found in global tree only must not be writable
+	 * in non-super ve.
+	 */
+	if (lde && gde && lde != gde && gde->nlink > 1)
+		inode->i_nlink += gde->nlink - 2;
+	if (lde == NULL && !ve_is_super(dir->i_sb->s_type->owner_env))
+		inode->i_mode &= ~S_IWUGO;
+#endif
 	unlock_kernel();
+	dentry->d_op = &proc_dentry_operations;
+	d_add(dentry, inode);
+	de_put(lde);
+	de_put(gde);
+	return NULL;
 
-	if (inode) {
-		dentry->d_op = &proc_dentry_operations;
-		d_add(dentry, inode);
-		return NULL;
-	}
+out_put:
+	if (lde)
+		module_put(lde->owner);
+	else
+		module_put(gde->owner);
+	de_put(lde);
+	de_put(gde);
+out:
+	unlock_kernel();
 	return ERR_PTR(error);
 }
 
+struct proc_dir_reader {
+	struct list_head list;
+	struct proc_dir_entry *next;
+};
+
+static LIST_HEAD(proc_dir_readers);
+static DEFINE_SPINLOCK(proc_dir_readers_lock);
+
+static inline void add_reader(struct proc_dir_reader *r,
+		struct proc_dir_entry *cur)
+{
+	r->next = cur->next;
+	spin_lock(&proc_dir_readers_lock);
+	list_add(&r->list, &proc_dir_readers);
+	spin_unlock(&proc_dir_readers_lock);
+}
+
+static inline struct proc_dir_entry *del_reader(struct proc_dir_reader *r)
+{
+	spin_lock(&proc_dir_readers_lock);
+	list_del(&r->list);
+	spin_unlock(&proc_dir_readers_lock);
+	return r->next;
+}
+
+static void notify_readers(struct proc_dir_entry *de)
+{
+	struct proc_dir_reader *r;
+
+	/* lockless since proc_tree_lock is taken for writing */
+	list_for_each_entry(r, &proc_dir_readers, list)
+		if (r->next == de)
+			r->next = de->next;
+}
+
+static inline int in_tree(struct proc_dir_entry *de, struct proc_dir_entry *dir)
+{
+	struct proc_dir_entry *gde;
+
+	for (gde = dir->subdir; gde; gde = gde->next) {
+		if (de->namelen != gde->namelen)
+			continue;
+		if (memcmp(de->name, gde->name, gde->namelen))
+			continue;
+		return 1;
+	}
+	return 0;
+}
+
 /*
  * This returns non-zero if at EOF, so that the /proc
  * root directory can use this and check if it should
@@ -434,6 +595,7 @@ int proc_readdir(struct file * filp,
 	int i;
 	struct inode *inode = filp->f_dentry->d_inode;
 	int ret = 0;
+	struct proc_dir_reader this;
 
 	lock_kernel();
 
@@ -460,15 +622,12 @@ int proc_readdir(struct file * filp,
 			filp->f_pos++;
 			/* fall through */
 		default:
-			spin_lock(&proc_subdir_lock);
+			read_lock(&proc_tree_lock);
 			de = de->subdir;
 			i -= 2;
 			for (;;) {
-				if (!de) {
-					ret = 1;
-					spin_unlock(&proc_subdir_lock);
-					goto out;
-				}
+				if (!de)
+					goto chk_global;
 				if (!i)
 					break;
 				de = de->next;
@@ -477,15 +636,60 @@ int proc_readdir(struct file * filp,
 
 			do {
 				/* filldir passes info to user space */
-				spin_unlock(&proc_subdir_lock);
-				if (filldir(dirent, de->name, de->namelen, filp->f_pos,
-					    de->low_ino, de->mode >> 12) < 0)
+				de_get(de);
+				add_reader(&this, de);
+				read_unlock(&proc_tree_lock);
+				ret = filldir(dirent, de->name, de->namelen,
+						filp->f_pos, de->low_ino,
+						de->mode >> 12);
+				read_lock(&proc_tree_lock);
+				de_put(de);
+				de = del_reader(&this);
+				if (ret < 0) {
+					read_unlock(&proc_tree_lock);
+					ret = 0;
 					goto out;
-				spin_lock(&proc_subdir_lock);
+				}
 				filp->f_pos++;
-				de = de->next;
 			} while (de);
-			spin_unlock(&proc_subdir_lock);
+chk_global:
+#ifdef CONFIG_VE
+			de = GPDE(inode);
+			if (de == NULL)
+				goto done;
+
+			de = de->subdir;
+			while (de) {
+				if (in_tree(de, LPDE(inode))) {
+					de = de->next;
+					continue;
+				}
+
+				if (i > 0) {
+					i--;
+					de = de->next;
+					continue;
+				}
+
+				de_get(de);
+				add_reader(&this, de);
+				read_unlock(&proc_tree_lock);
+				ret = filldir(dirent, de->name, de->namelen,
+						filp->f_pos, de->low_ino,
+						de->mode >> 12);
+				read_lock(&proc_tree_lock);
+				de_put(de);
+				de = del_reader(&this);
+				if (ret < 0) {
+					read_unlock(&proc_tree_lock);
+					ret = 0;
+					goto out;
+				}
+				filp->f_pos++;
+			}
+done:
+#endif
+			read_unlock(&proc_tree_lock);
 	}
 	ret = 1;
 out:	unlock_kernel();
@@ -518,13 +722,19 @@ static int proc_register(struct proc_dir
 	i = get_inode_number();
 	if (i == 0)
 		return -EAGAIN;
-	dp->low_ino = i;
 
-	spin_lock(&proc_subdir_lock);
+	write_lock(&proc_tree_lock);
+	if (dir->deleted) {
+		write_unlock(&proc_tree_lock);
+		release_inode_number(i);
+		return -ENOENT;
+	}
+
+	dp->low_ino = i;
 	dp->next = dir->subdir;
-	dp->parent = dir;
+	dp->parent = de_get(dir);
 	dir->subdir = dp;
-	spin_unlock(&proc_subdir_lock);
+	write_unlock(&proc_tree_lock);
 
 	if (S_ISDIR(dp->mode)) {
 		if (dp->proc_iops == NULL) {
@@ -579,24 +789,26 @@ static struct proc_dir_entry *proc_creat
 					  mode_t mode,
 					  nlink_t nlink)
 {
-	struct proc_dir_entry *ent = NULL;
+	struct proc_dir_entry *ent;
 	const char *fn = name;
 	int len;
 
 	/* make sure name is valid */
-	if (!name || !strlen(name)) goto out;
+	if (!name || !strlen(name))
+		goto out;
 
-	if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0)
+	if (xlate_proc_loc_name(name, parent, &fn) != 0)
 		goto out;
 
 	/* At this point there must not be any '/' characters beyond *fn */
 	if (strchr(fn, '/'))
-		goto out;
+		goto out_put;
 
 	len = strlen(fn);
 
 	ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL);
-	if (!ent) goto out;
+	if (!ent)
+		goto out_put;
 
 	memset(ent, 0, sizeof(struct proc_dir_entry));
 	memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1);
@@ -604,8 +816,13 @@ static struct proc_dir_entry *proc_creat
 	ent->namelen = len;
 	ent->mode = mode;
 	ent->nlink = nlink;
- out:
+	atomic_set(&ent->count, 1);
 	return ent;
+
+out_put:
+	de_put(*parent);
+out:
+	return NULL;
 }
 
 struct proc_dir_entry *proc_symlink(const char *name,
@@ -629,6 +846,7 @@ struct proc_dir_entry *proc_symlink(cons
 			kfree(ent);
 			ent = NULL;
 		}
+		de_put(parent);
 	}
 	return ent;
 }
@@ -647,6 +865,7 @@ struct proc_dir_entry *proc_mkdir_mode(c
 			kfree(ent);
 			ent = NULL;
 		}
+		de_put(parent);
 	}
 	return ent;
 }
@@ -685,9 +904,28 @@ struct proc_dir_entry *create_proc_entry
 			kfree(ent);
 			ent = NULL;
 		}
+		de_put(parent);
 	}
 	return ent;
 }
+EXPORT_SYMBOL(remove_proc_glob_entry);
+
+struct proc_dir_entry *create_proc_glob_entry(const char *name, mode_t mode,
+		struct proc_dir_entry *parent)
+{
+	const char *path;
+	struct proc_dir_entry *ent;
+
+	path = name;
+	if (xlate_proc_name(path, &parent, &name) != 0)
+		return NULL;
+
+	ent = create_proc_entry(name, mode, parent);
+	de_put(parent);
+	return ent;
+}
+
+EXPORT_SYMBOL(create_proc_glob_entry);
 
 void free_proc_entry(struct proc_dir_entry *de)
 {
@@ -703,43 +941,80 @@ void free_proc_entry(struct proc_dir_ent
 	kfree(de);
 }
 
+static void show_proc_subdirs(struct proc_dir_entry *de, int level)
+{
+	for (de = de->subdir; de != NULL; de = de->next) {
+		printk("%*s [%s]\n", level, " ", de->name);
+		show_proc_subdirs(de, level + 5);
+	}
+}
+
 /*
  * Remove a /proc entry and free it if it's not currently in use.
  * If it is in use, we set the 'deleted' flag.
  */
-void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
+static void __remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 {
 	struct proc_dir_entry **p;
 	struct proc_dir_entry *de;
 	const char *fn = name;
 	int len;
 
-	if (!parent && xlate_proc_name(name, &parent, &fn) != 0)
-		goto out;
 	len = strlen(fn);
 
-	spin_lock(&proc_subdir_lock);
+	write_lock(&proc_tree_lock);
 	for (p = &parent->subdir; *p; p=&(*p)->next ) {
 		if (!proc_match(len, fn, *p))
 			continue;
 		de = *p;
+		notify_readers(de);
 		*p = de->next;
 		de->next = NULL;
 		if (S_ISDIR(de->mode))
 			parent->nlink--;
 		proc_kill_inodes(de);
 		de->nlink = 0;
-		WARN_ON(de->subdir);
-		if (!atomic_read(&de->count))
-			free_proc_entry(de);
-		else {
-			de->deleted = 1;
-			printk("remove_proc_entry: %s/%s busy, count=%d\n",
-				parent->name, de->name, atomic_read(&de->count));
-		}
+		if (de->subdir) {
+			WARN_ON(1);
+			show_proc_subdirs(de, 0);
+		}
+		de->deleted = 1;
+		de_put(de);
+		de_put(parent);
 		break;
 	}
-	spin_unlock(&proc_subdir_lock);
-out:
-	return;
+	 write_unlock(&proc_tree_lock);
+}
+
+void remove_proc_loc_entry(const char *name, struct proc_dir_entry *parent)
+{
+	const char *path;
+
+	path = name;
+	if (xlate_proc_loc_name(path, &parent, &name) != 0)
+		return;
+
+	__remove_proc_entry(name, parent);
+	de_put(parent);
+}
+
+void remove_proc_glob_entry(const char *name, struct proc_dir_entry *parent)
+{
+	const char *path;
+
+	path = name;
+	if (xlate_proc_name(path, &parent, &name) != 0)
+		return;
+
+	__remove_proc_entry(name, parent);
+	de_put(parent);
+}
+
+void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
+{
+	remove_proc_loc_entry(name, parent);
+#ifdef CONFIG_VE
+	if (ve_is_super(get_exec_env()))
+		remove_proc_glob_entry(name, parent);
+#endif
 }
diff -upr linux-2.6.16.46-0.12.orig/fs/proc/inode.c linux-2.6.16.46-0.12-027test011/fs/proc/inode.c
--- linux-2.6.16.46-0.12.orig/fs/proc/inode.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/proc/inode.c	2007-08-28 17:35:36.000000000 +0400
@@ -21,34 +21,25 @@
 
 #include "internal.h"
 
-static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de)
-{
-	if (de)
-		atomic_inc(&de->count);
-	return de;
-}
-
 /*
  * Decrements the use count and checks for deferred deletion.
  */
-static void de_put(struct proc_dir_entry *de)
+void de_put(struct proc_dir_entry *de)
 {
 	if (de) {	
-		lock_kernel();		
 		if (!atomic_read(&de->count)) {
 			printk("de_put: entry %s already free!\n", de->name);
-			unlock_kernel();
 			return;
 		}
 
 		if (atomic_dec_and_test(&de->count)) {
-			if (de->deleted) {
-				printk("de_put: deferred delete of %s\n",
+			if (unlikely(!de->deleted)) {
+				printk("de_put: early delete of %s\n",
 					de->name);
-				free_proc_entry(de);
+				return;
 			}
+			free_proc_entry(de);
 		}		
-		unlock_kernel();
 	}
 }
 
@@ -68,12 +59,19 @@ static void proc_delete_inode(struct ino
 		put_task_struct(tsk);
 
 	/* Let go of any associated proc directory entry */
-	de = PROC_I(inode)->pde;
+	de = LPDE(inode);
 	if (de) {
 		if (de->owner)
 			module_put(de->owner);
 		de_put(de);
 	}
+#ifdef CONFIG_VE
+	de = GPDE(inode);
+	if (de) {
+		module_put(de->owner);
+		de_put(de);
+	}
+#endif
 	clear_inode(inode);
 }
 
@@ -100,6 +98,9 @@ static struct inode *proc_alloc_inode(st
 	ei->pde = NULL;
 	inode = &ei->vfs_inode;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+#ifdef CONFIG_VE
+	GPDE(inode) = NULL;
+#endif
 	return inode;
 }
 
@@ -155,14 +156,9 @@ struct inode *proc_get_inode(struct supe
 	 */
 	de_get(de);
 
-	WARN_ON(de && de->deleted);
-
-	if (de != NULL && !try_module_get(de->owner))
-		goto out_mod;
-
 	inode = iget(sb, ino);
 	if (!inode)
-		goto out_ino;
+		goto out_mod;
 
 	PROC_I(inode)->pde = de;
 	if (de) {
@@ -183,9 +179,6 @@ struct inode *proc_get_inode(struct supe
 
 	return inode;
 
-out_ino:
-	if (de != NULL)
-		module_put(de->owner);
 out_mod:
 	de_put(de);
 	return NULL;
@@ -201,7 +194,9 @@ int proc_fill_super(struct super_block *
 	s->s_magic = PROC_SUPER_MAGIC;
 	s->s_op = &proc_sops;
 	s->s_time_gran = 1;
-	
+
+	/* proc_root.owner == NULL, just a formal call */
+	__module_get(proc_root.owner);
 	root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root);
 	if (!root_inode)
 		goto out_no_root;
@@ -210,6 +205,12 @@ int proc_fill_super(struct super_block *
 	s->s_root = d_alloc_root(root_inode);
 	if (!s->s_root)
 		goto out_no_root;
+#ifdef CONFIG_VE
+	LPDE(root_inode) = de_get(get_exec_env()->proc_root);
+	GPDE(root_inode) = &proc_root;
+#else
+	LPDE(root_inode) = &proc_root;
+#endif
 	return 0;
 
 out_no_root:
diff -upr linux-2.6.16.46-0.12.orig/fs/proc/kmsg.c linux-2.6.16.46-0.12-027test011/fs/proc/kmsg.c
--- linux-2.6.16.46-0.12.orig/fs/proc/kmsg.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/proc/kmsg.c	2007-08-28 17:35:32.000000000 +0400
@@ -11,6 +11,8 @@
 #include <linux/kernel.h>
 #include <linux/poll.h>
 #include <linux/fs.h>
+#include <linux/veprintk.h>
+#include <linux/module.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -40,7 +42,7 @@ static ssize_t kmsg_read(struct file *fi
 
 static unsigned int kmsg_poll(struct file *file, poll_table *wait)
 {
-	poll_wait(file, &log_wait, wait);
+	poll_wait(file, &ve_log_wait, wait);
 	if (do_syslog(9, NULL, 0))
 		return POLLIN | POLLRDNORM;
 	return 0;
@@ -53,3 +55,4 @@ struct file_operations proc_kmsg_operati
 	.open		= kmsg_open,
 	.release	= kmsg_release,
 };
+EXPORT_SYMBOL(proc_kmsg_operations);
diff -upr linux-2.6.16.46-0.12.orig/fs/proc/proc_devtree.c linux-2.6.16.46-0.12-027test011/fs/proc/proc_devtree.c
--- linux-2.6.16.46-0.12.orig/fs/proc/proc_devtree.c	2007-08-24 19:28:10.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/proc/proc_devtree.c	2007-08-28 17:35:29.000000000 +0400
@@ -117,8 +117,6 @@ static int duplicate_name(struct proc_di
 	struct proc_dir_entry *ent;
 	int found = 0;
 
-	spin_lock(&proc_subdir_lock);
-
 	for (ent = de->subdir; ent != NULL; ent = ent->next) {
 		if (strcmp(ent->name, name) == 0) {
 			found = 1;
@@ -126,8 +124,6 @@ static int duplicate_name(struct proc_di
 		}
 	}
 
-	spin_unlock(&proc_subdir_lock);
-
 	return found;
 }
 
diff -upr linux-2.6.16.46-0.12.orig/fs/proc/proc_misc.c linux-2.6.16.46-0.12-027test011/fs/proc/proc_misc.c
--- linux-2.6.16.46-0.12.orig/fs/proc/proc_misc.c	2007-08-24 19:28:27.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/proc/proc_misc.c	2007-08-28 17:35:34.000000000 +0400
@@ -32,6 +32,7 @@
 #include <linux/pagemap.h>
 #include <linux/swap.h>
 #include <linux/slab.h>
+#include <linux/virtinfo.h>
 #include <linux/smp.h>
 #include <linux/signal.h>
 #include <linux/module.h>
@@ -45,6 +46,8 @@
 #include <linux/jiffies.h>
 #include <linux/sysrq.h>
 #include <linux/vmalloc.h>
+#include <linux/version.h>
+#include <linux/compile.h>
 #include <linux/crash_dump.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -53,8 +56,10 @@
 #include <asm/div64.h>
 #include "internal.h"
 
-#define LOAD_INT(x) ((x) >> FSHIFT)
-#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+#ifdef CONFIG_FAIRSCHED
+#include <linux/fairsched.h>
+#endif
+
 /*
  * Warning: stuff below (imported functions) assumes that its output will fit
  * into one page. For some of those functions it may be wrong. Moreover, we
@@ -84,15 +89,33 @@ static int loadavg_read_proc(char *page,
 {
 	int a, b, c;
 	int len;
-
-	a = avenrun[0] + (FIXED_1/200);
-	b = avenrun[1] + (FIXED_1/200);
-	c = avenrun[2] + (FIXED_1/200);
+	unsigned long __nr_running;
+	int __nr_threads;
+	unsigned long *__avenrun;
+	struct ve_struct *ve;
+
+	ve = get_exec_env();
+
+	if (ve_is_super(ve)) {
+		__avenrun = &avenrun[0];
+		__nr_running = nr_running();
+		__nr_threads = nr_threads;
+	}
+#ifdef CONFIG_VE
+	else {
+		__avenrun = &ve->avenrun[0];
+		__nr_running = nr_running_ve(ve);
+		__nr_threads = atomic_read(&ve->pcounter);
+	}
+#endif
+	a = __avenrun[0] + (FIXED_1/200);
+	b = __avenrun[1] + (FIXED_1/200);
+	c = __avenrun[2] + (FIXED_1/200);
 	len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
 		LOAD_INT(a), LOAD_FRAC(a),
 		LOAD_INT(b), LOAD_FRAC(b),
 		LOAD_INT(c), LOAD_FRAC(c),
-		nr_running(), nr_threads, last_pid);
+		__nr_running, __nr_threads, last_pid);
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
 
@@ -105,6 +128,13 @@ static int uptime_read_proc(char *page, 
 	cputime_t idletime = cputime_add(init_task.utime, init_task.stime);
 
 	do_posix_clock_monotonic_gettime(&uptime);
+#ifdef CONFIG_VE
+	if (!ve_is_super(get_exec_env())) {
+		set_normalized_timespec(&uptime,
+		      uptime.tv_sec - get_exec_env()->start_timespec.tv_sec,
+		      uptime.tv_nsec - get_exec_env()->start_timespec.tv_nsec);
+	}
+#endif
 	cputime_to_timespec(idletime, &idle);
 	len = sprintf(page,"%lu.%02lu %lu.%02lu\n",
 			(unsigned long) uptime.tv_sec,
@@ -118,35 +148,40 @@ static int uptime_read_proc(char *page, 
 static int meminfo_read_proc(char *page, char **start, off_t off,
 				 int count, int *eof, void *data)
 {
-	struct sysinfo i;
+	struct meminfo mi;
 	int len;
-	struct page_state ps;
-	unsigned long inactive;
-	unsigned long active;
-	unsigned long free;
-	unsigned long committed;
-	unsigned long allowed;
+	unsigned long dummy;
 	struct vmalloc_info vmi;
-	long cached;
 
-	get_page_state(&ps);
-	get_zone_counts(&active, &inactive, &free);
+	get_page_state(&mi.ps);
+	get_zone_counts(&mi.active, &mi.inactive, &dummy);
 
 /*
  * display in kilobytes.
  */
 #define K(x) ((x) << (PAGE_SHIFT - 10))
-	si_meminfo(&i);
-	si_swapinfo(&i);
-	committed = atomic_read(&vm_committed_space);
-	allowed = ((totalram_pages - hugetlb_total_pages())
-		* sysctl_overcommit_ratio / 100) + total_swap_pages;
+	si_meminfo(&mi.si);
+	si_swapinfo(&mi.si);
+	mi.committed_space = atomic_read(&vm_committed_space);
+	mi.swapcache = total_swapcache_pages;
+	mi.cache = get_page_cache_size() - mi.swapcache - mi.si.bufferram;
+	if (mi.cache < 0)
+		mi.cache = 0;
 
-	cached = get_page_cache_size() - total_swapcache_pages - i.bufferram;
-	if (cached < 0)
-		cached = 0;
+	mi.vmalloc_total = (VMALLOC_END - VMALLOC_START) >> PAGE_SHIFT;
+	mi.allowed = ((totalram_pages - hugetlb_total_pages())
+		* sysctl_overcommit_ratio / 100) + total_swap_pages;
 
 	get_vmalloc_info(&vmi);
+	mi.vmalloc_used = vmi.used >> PAGE_SHIFT;
+	mi.vmalloc_largest = vmi.largest_chunk >> PAGE_SHIFT;
+	mi.vmalloc_total = VMALLOC_TOTAL >> PAGE_SHIFT;
+
+#ifdef CONFIG_USER_RESOURCE
+	if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_MEMINFO, &mi)
+			& NOTIFY_FAIL)
+		return -ENOMSG;
+#endif
 
 	/*
 	 * Tagged format, for easy grepping and expansion.
@@ -176,30 +211,30 @@ static int meminfo_read_proc(char *page,
 		"VmallocTotal: %8lu kB\n"
 		"VmallocUsed:  %8lu kB\n"
 		"VmallocChunk: %8lu kB\n",
-		K(i.totalram),
-		K(i.freeram),
-		K(i.bufferram),
-		K(cached),
-		K(total_swapcache_pages),
-		K(active),
-		K(inactive),
-		K(i.totalhigh),
-		K(i.freehigh),
-		K(i.totalram-i.totalhigh),
-		K(i.freeram-i.freehigh),
-		K(i.totalswap),
-		K(i.freeswap),
-		K(ps.nr_dirty),
-		K(ps.nr_writeback),
-		K(ps.nr_anon),
-		K(ps.nr_mapped),
-		K(ps.nr_slab),
-		K(allowed),
-		K(committed),
-		K(ps.nr_page_table_pages),
-		(unsigned long)VMALLOC_TOTAL >> 10,
-		vmi.used >> 10,
-		vmi.largest_chunk >> 10
+		K(mi.si.totalram),
+		K(mi.si.freeram),
+		K(mi.si.bufferram),
+		K(mi.cache),
+		K(mi.swapcache),
+		K(mi.active),
+		K(mi.inactive),
+		K(mi.si.totalhigh),
+		K(mi.si.freehigh),
+		K(mi.si.totalram-mi.si.totalhigh),
+		K(mi.si.freeram-mi.si.freehigh),
+		K(mi.si.totalswap),
+		K(mi.si.freeswap),
+		K(mi.ps.nr_dirty),
+		K(mi.ps.nr_writeback),
+		K(mi.ps.nr_anon),
+		K(mi.ps.nr_mapped),
+		K(mi.ps.nr_slab),
+		K(mi.allowed),
+		K(mi.committed_space),
+		K(mi.ps.nr_page_table_pages),
+		K(mi.vmalloc_total),
+		K(mi.vmalloc_used),
+		K(mi.vmalloc_largest)
 		);
 
 		len += hugetlb_report_meminfo(page + len);
@@ -339,8 +374,15 @@ static int version_read_proc(char *page,
 				 int count, int *eof, void *data)
 {
 	int len;
+	struct new_utsname *utsname = &ve_utsname;
 
-	strcpy(page, linux_banner);
+	if (ve_is_super(get_exec_env()))
+		strcpy(page, linux_banner);
+	else
+		sprintf(page, "Linux version %s ("
+		      LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") ("
+		      LINUX_COMPILER ") %s\n",
+		      utsname->release, utsname->version);
 	len = strlen(page);
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
@@ -498,18 +540,15 @@ static struct file_operations proc_slabi
 };
 #endif
 
-static int show_stat(struct seq_file *p, void *v)
+static void show_stat_ve0(struct seq_file *p)
 {
 	int i;
-	unsigned long jif;
+	struct page_state page_state;
 	cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
 	u64 sum = 0;
 
 	user = nice = system = idle = iowait =
 		irq = softirq = steal = cputime64_zero;
-	jif = - wall_to_monotonic.tv_sec;
-	if (wall_to_monotonic.tv_nsec)
-		--jif;
 
 	for_each_cpu(i) {
 		int j;
@@ -563,9 +602,86 @@ static int show_stat(struct seq_file *p,
 	for (i = 0; i < NR_IRQS; i++)
 		seq_printf(p, " %u", kstat_irqs(i));
 #endif
+	get_full_page_state(&page_state);
+	seq_printf(p, "\nswap %lu %lu\n", page_state.pswpin, page_state.pswpout);
+}
+
+#ifdef CONFIG_VE
+static void show_stat_ve(struct seq_file *p, struct ve_struct *env)
+{
+	int i;
+	u64 user, nice, system;
+	cycles_t idle, iowait;
+	cpumask_t ve_cpus;
+
+	ve_cpu_online_map(env, &ve_cpus);
+
+	user = nice = system = idle = iowait = 0;
+	for_each_cpu_mask(i, ve_cpus) {
+		user += VE_CPU_STATS(env, i)->user;
+		nice += VE_CPU_STATS(env, i)->nice;
+		system += VE_CPU_STATS(env, i)->system;
+
+		idle += ve_sched_get_idle_time(i);
+		iowait += ve_sched_get_iowait_time(i);
+	}
+
+	seq_printf(p, "cpu  %llu %llu %llu %llu %llu 0 0 0\n",
+		(unsigned long long)cputime64_to_clock_t(user),
+		(unsigned long long)cputime64_to_clock_t(nice),
+		(unsigned long long)cputime64_to_clock_t(system),
+		(unsigned long long)cycles_to_clocks(idle),
+		(unsigned long long)cycles_to_clocks(iowait));
+
+	for_each_cpu_mask(i, ve_cpus) {
+		user = VE_CPU_STATS(env, i)->user;
+		nice = VE_CPU_STATS(env, i)->nice;
+		system = VE_CPU_STATS(env, i)->system;
+
+		idle = ve_sched_get_idle_time(i);
+		iowait = ve_sched_get_iowait_time(i);
+		seq_printf(p, "cpu%d %llu %llu %llu %llu %llu 0 0 0\n",
+			i,
+			(unsigned long long)cputime64_to_clock_t(user),
+			(unsigned long long)cputime64_to_clock_t(nice),
+			(unsigned long long)cputime64_to_clock_t(system),
+			(unsigned long long)cycles_to_clocks(idle),
+			(unsigned long long)cycles_to_clocks(iowait));
+	}
+	seq_printf(p, "intr 0\nswap 0 0\n");
+}
+#endif
+
+int show_stat(struct seq_file *p, void *v)
+{
+	extern unsigned long total_forks;
+	unsigned long seq, jif;
+	struct ve_struct *env;
+	unsigned long __nr_running, __nr_iowait;
+
+	do {
+		seq = read_seqbegin(&xtime_lock);
+		jif = - wall_to_monotonic.tv_sec;
+		if (wall_to_monotonic.tv_nsec)
+			--jif;
+	} while (read_seqretry(&xtime_lock, seq));
+
+	env = get_exec_env();
+	if (ve_is_super(env)) {
+		show_stat_ve0(p);
+		__nr_running = nr_running();
+		__nr_iowait = nr_iowait();
+	}
+#ifdef CONFIG_VE
+	else {
+		show_stat_ve(p, env);
+		__nr_running = nr_running_ve(env);
+		__nr_iowait = nr_iowait_ve();
+	}
+#endif
 
 	seq_printf(p,
-		"\nctxt %llu\n"
+		"ctxt %llu\n"
 		"btime %lu\n"
 		"processes %lu\n"
 		"procs_running %lu\n"
@@ -573,8 +689,8 @@ static int show_stat(struct seq_file *p,
 		nr_context_switches(),
 		(unsigned long)jif,
 		total_forks,
-		nr_running(),
-		nr_iowait());
+		__nr_running,
+		__nr_iowait);
 
 	return 0;
 }
@@ -663,7 +779,8 @@ static int cmdline_read_proc(char *page,
 {
 	int len;
 
-	len = sprintf(page, "%s\n", saved_command_line);
+	len = sprintf(page, "%s\n",
+		ve_is_super(get_exec_env()) ? saved_command_line : "quiet");
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
 
@@ -692,11 +809,15 @@ static ssize_t write_sysrq_trigger(struc
 				   size_t count, loff_t *ppos)
 {
 	if (count) {
-		char c;
+		int i, cnt;
+		char c[32];
 
-		if (get_user(c, buf))
+		cnt = min(count, sizeof(c));
+		if (copy_from_user(c, buf, cnt))
 			return -EFAULT;
-		__handle_sysrq(c, NULL, NULL, 0);
+
+		for (i = 0; i < cnt; i++)
+			__handle_sysrq(c[i], NULL, NULL, 0);
 	}
 	return count;
 }
diff -upr linux-2.6.16.46-0.12.orig/fs/proc/proc_tty.c linux-2.6.16.46-0.12-027test011/fs/proc/proc_tty.c
--- linux-2.6.16.46-0.12.orig/fs/proc/proc_tty.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/proc/proc_tty.c	2007-08-28 17:35:31.000000000 +0400
@@ -106,24 +106,35 @@ static int show_tty_driver(struct seq_fi
 /* iterator */
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
-	struct list_head *p;
+	struct tty_driver *drv;
+
 	loff_t l = *pos;
-	list_for_each(p, &tty_drivers)
+	read_lock(&tty_driver_guard);
+	list_for_each_entry(drv, &tty_drivers, tty_drivers) {
+		if (!ve_accessible_strict(drv->owner_env, get_exec_env()))
+			continue;
 		if (!l--)
-			return list_entry(p, struct tty_driver, tty_drivers);
+			return drv;
+	}
 	return NULL;
 }
 
 static void *t_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	struct list_head *p = ((struct tty_driver *)v)->tty_drivers.next;
+	struct tty_driver *drv;
+
 	(*pos)++;
-	return p==&tty_drivers ? NULL :
-			list_entry(p, struct tty_driver, tty_drivers);
+	drv = (struct tty_driver *)v;
+	list_for_each_entry_continue(drv, &tty_drivers, tty_drivers) {
+		if (ve_accessible_strict(drv->owner_env, get_exec_env()))
+			return drv;
+	}
+	return NULL;
 }
 
 static void t_stop(struct seq_file *m, void *v)
 {
+	read_unlock(&tty_driver_guard);
 }
 
 static struct seq_operations tty_drivers_op = {
diff -upr linux-2.6.16.46-0.12.orig/fs/proc/root.c linux-2.6.16.46-0.12-027test011/fs/proc/root.c
--- linux-2.6.16.46-0.12.orig/fs/proc/root.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/proc/root.c	2007-08-28 17:35:31.000000000 +0400
@@ -20,7 +20,10 @@
 
 #include "internal.h"
 
-struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver;
+#ifndef CONFIG_VE
+struct proc_dir_entry *proc_net, *proc_net_stat;
+#endif
+struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver;
 
 #ifdef CONFIG_SYSCTL
 struct proc_dir_entry *proc_sys_root;
@@ -32,12 +35,14 @@ static struct super_block *proc_get_sb(s
 	return get_sb_single(fs_type, flags, data, proc_fill_super);
 }
 
-static struct file_system_type proc_fs_type = {
+struct file_system_type proc_fs_type = {
 	.name		= "proc",
 	.get_sb		= proc_get_sb,
 	.kill_sb	= kill_anon_super,
 };
 
+EXPORT_SYMBOL(proc_fs_type);
+
 void __init proc_root_init(void)
 {
 	int err = proc_init_inodecache();
@@ -157,7 +162,9 @@ EXPORT_SYMBOL(create_proc_entry);
 EXPORT_SYMBOL(remove_proc_entry);
 EXPORT_SYMBOL(proc_root);
 EXPORT_SYMBOL(proc_root_fs);
+#ifndef CONFIG_VE
 EXPORT_SYMBOL(proc_net);
 EXPORT_SYMBOL(proc_net_stat);
+#endif
 EXPORT_SYMBOL(proc_bus);
 EXPORT_SYMBOL(proc_root_driver);
diff -upr linux-2.6.16.46-0.12.orig/fs/proc/task_mmu.c linux-2.6.16.46-0.12-027test011/fs/proc/task_mmu.c
--- linux-2.6.16.46-0.12.orig/fs/proc/task_mmu.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/proc/task_mmu.c	2007-08-28 17:35:31.000000000 +0400
@@ -90,9 +90,12 @@ int proc_exe_link(struct inode *inode, s
 	}
 
 	if (vma) {
-		*mnt = mntget(vma->vm_file->f_vfsmnt);
-		*dentry = dget(vma->vm_file->f_dentry);
-		result = 0;
+		result = d_root_check(vma->vm_file->f_dentry,
+				vma->vm_file->f_vfsmnt);
+		if (!result) {
+			*mnt = mntget(vma->vm_file->f_vfsmnt);
+			*dentry = dget(vma->vm_file->f_dentry);
+		}
 	}
 
 	up_read(&mm->mmap_sem);
diff -upr linux-2.6.16.46-0.12.orig/fs/proc/task_nommu.c linux-2.6.16.46-0.12-027test011/fs/proc/task_nommu.c
--- linux-2.6.16.46-0.12.orig/fs/proc/task_nommu.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/proc/task_nommu.c	2007-08-28 17:35:31.000000000 +0400
@@ -126,9 +126,12 @@ int proc_exe_link(struct inode *inode, s
 	}
 
 	if (vma) {
-		*mnt = mntget(vma->vm_file->f_vfsmnt);
-		*dentry = dget(vma->vm_file->f_dentry);
-		result = 0;
+		result = d_root_check(vma->vm_file->f_dentry,
+				vma->vm_file->f_vfsmnt);
+		if (!result) {
+			*mnt = mntget(vma->vm_file->f_vfsmnt);
+			*dentry = dget(vma->vm_file->f_dentry);
+		}
 	}
 
 	up_read(&mm->mmap_sem);
diff -upr linux-2.6.16.46-0.12.orig/fs/quota.c linux-2.6.16.46-0.12-027test011/fs/quota.c
--- linux-2.6.16.46-0.12.orig/fs/quota.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/quota.c	2007-08-28 17:35:33.000000000 +0400
@@ -81,11 +81,11 @@ static int generic_quotactl_valid(struct
 	if (cmd == Q_GETQUOTA) {
 		if (((type == USRQUOTA && current->euid != id) ||
 		     (type == GRPQUOTA && !in_egroup_p(id))) &&
-		    !capable(CAP_SYS_ADMIN))
+		    !capable(CAP_VE_SYS_ADMIN))
 			return -EPERM;
 	}
 	else if (cmd != Q_GETFMT && cmd != Q_SYNC && cmd != Q_GETINFO)
-		if (!capable(CAP_SYS_ADMIN))
+		if (!capable(CAP_VE_SYS_ADMIN))
 			return -EPERM;
 
 	return 0;
@@ -132,10 +132,10 @@ static int xqm_quotactl_valid(struct sup
 	if (cmd == Q_XGETQUOTA) {
 		if (((type == XQM_USRQUOTA && current->euid != id) ||
 		     (type == XQM_GRPQUOTA && !in_egroup_p(id))) &&
-		     !capable(CAP_SYS_ADMIN))
+		     !capable(CAP_VE_SYS_ADMIN))
 			return -EPERM;
 	} else if (cmd != Q_XGETQSTAT && cmd != Q_XQUOTASYNC) {
-		if (!capable(CAP_SYS_ADMIN))
+		if (!capable(CAP_VE_SYS_ADMIN))
 			return -EPERM;
 	}
 
@@ -216,7 +216,7 @@ restart:
 		sb->s_count++;
 		spin_unlock(&sb_lock);
 		down_read(&sb->s_umount);
-		if (sb->s_root && sb->s_qcop->quota_sync)
+		if (sb->s_root && sb->s_qcop && sb->s_qcop->quota_sync)
 			quota_sync_sb(sb, type);
 		up_read(&sb->s_umount);
 		spin_lock(&sb_lock);
@@ -358,7 +358,7 @@ asmlinkage long sys_quotactl(unsigned in
 		tmp = getname(special);
 		if (IS_ERR(tmp))
 			return PTR_ERR(tmp);
-		bdev = lookup_bdev(tmp);
+		bdev = lookup_bdev(tmp, FMODE_QUOTACTL);
 		putname(tmp);
 		if (IS_ERR(bdev))
 			return PTR_ERR(bdev);
diff -upr linux-2.6.16.46-0.12.orig/fs/read_write.c linux-2.6.16.46-0.12-027test011/fs/read_write.c
--- linux-2.6.16.46-0.12.orig/fs/read_write.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/read_write.c	2007-08-28 17:35:30.000000000 +0400
@@ -19,6 +19,8 @@
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 
+#include <ub/beancounter.h>
+
 struct file_operations generic_ro_fops = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
@@ -339,6 +341,29 @@ static inline void file_pos_write(struct
 	file->f_pos = pos;
 }
 
+static inline void bc_acct_write(size_t bytes)
+{
+	struct user_beancounter *ub;
+
+	if (bytes > 0) {
+		ub = get_exec_ub();
+		ub_percpu_inc(ub, write);
+		ub_percpu_add(ub, wchar, bytes);
+	}
+}
+
+static inline void bc_acct_read(size_t bytes)
+{
+	struct user_beancounter *ub;
+
+	if (bytes > 0) {
+		ub = get_exec_ub();
+		ub_percpu_inc(ub, read);
+		ub_percpu_add(ub, rchar, bytes);
+	}
+}
+
+
 asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count)
 {
 	struct file *file;
@@ -351,6 +376,8 @@ asmlinkage ssize_t sys_read(unsigned int
 		ret = vfs_read(file, buf, count, &pos);
 		file_pos_write(file, pos);
 		fput_light(file, fput_needed);
+
+		bc_acct_read(ret);
 	}
 
 	return ret;
@@ -369,6 +396,8 @@ asmlinkage ssize_t sys_write(unsigned in
 		ret = vfs_write(file, buf, count, &pos);
 		file_pos_write(file, pos);
 		fput_light(file, fput_needed);
+
+		bc_acct_write(ret);
 	}
 
 	return ret;
@@ -390,6 +419,8 @@ asmlinkage ssize_t sys_pread64(unsigned 
 		if (file->f_mode & FMODE_PREAD)
 			ret = vfs_read(file, buf, count, &pos);
 		fput_light(file, fput_needed);
+
+		bc_acct_read(ret);
 	}
 
 	return ret;
@@ -411,6 +442,8 @@ asmlinkage ssize_t sys_pwrite64(unsigned
 		if (file->f_mode & FMODE_PWRITE)  
 			ret = vfs_write(file, buf, count, &pos);
 		fput_light(file, fput_needed);
+
+		bc_acct_write(ret);
 	}
 
 	return ret;
@@ -607,6 +640,8 @@ sys_readv(unsigned long fd, const struct
 		ret = vfs_readv(file, vec, vlen, &pos);
 		file_pos_write(file, pos);
 		fput_light(file, fput_needed);
+
+		bc_acct_read(ret);
 	}
 
 	if (ret > 0)
@@ -628,6 +663,8 @@ sys_writev(unsigned long fd, const struc
 		ret = vfs_writev(file, vec, vlen, &pos);
 		file_pos_write(file, pos);
 		fput_light(file, fput_needed);
+
+		bc_acct_write(ret);
 	}
 
 	if (ret > 0)
diff -upr linux-2.6.16.46-0.12.orig/fs/reiserfs/namei.c linux-2.6.16.46-0.12-027test011/fs/reiserfs/namei.c
--- linux-2.6.16.46-0.12.orig/fs/reiserfs/namei.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/reiserfs/namei.c	2007-08-28 17:35:33.000000000 +0400
@@ -864,6 +864,9 @@ static int reiserfs_rmdir(struct inode *
 	INITIALIZE_PATH(path);
 	struct reiserfs_dir_entry de;
 
+	inode = dentry->d_inode;
+	DQUOT_INIT(inode);
+
 	/* we will be doing 2 balancings and update 2 stat data, we change quotas
 	 * of the owner of the directory and of the owner of the parent directory.
 	 * The quota structure is possibly deleted only on last iput => outside
@@ -888,8 +891,6 @@ static int reiserfs_rmdir(struct inode *
 		goto end_rmdir;
 	}
 
-	inode = dentry->d_inode;
-
 	reiserfs_update_inode_transaction(inode);
 	reiserfs_update_inode_transaction(dir);
 
@@ -952,6 +953,7 @@ static int reiserfs_unlink(struct inode 
 	unsigned long savelink;
 
 	inode = dentry->d_inode;
+	DQUOT_INIT(inode);
 
 	/* in this transaction we can be doing at max two balancings and update
 	 * two stat datas, we change quotas of the owner of the directory and of
@@ -1259,6 +1261,8 @@ static int reiserfs_rename(struct inode 
 
 	old_inode = old_dentry->d_inode;
 	new_dentry_inode = new_dentry->d_inode;
+	if (new_dentry_inode)
+		DQUOT_INIT(new_dentry_inode);
 
 	// make sure, that oldname still exists and points to an object we
 	// are going to rename
diff -upr linux-2.6.16.46-0.12.orig/fs/select.c linux-2.6.16.46-0.12-027test011/fs/select.c
--- linux-2.6.16.46-0.12.orig/fs/select.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/select.c	2007-08-28 17:35:30.000000000 +0400
@@ -24,6 +24,8 @@
 #include <linux/fs.h>
 #include <linux/rcupdate.h>
 
+#include <ub/ub_mem.h>
+
 #include <asm/uaccess.h>
 
 #define ROUND_UP(x,y) (((x)+(y)-1)/(y))
@@ -286,7 +288,11 @@ int do_select(int n, fd_set_bits *fds, s
 
 static void *select_bits_alloc(int size)
 {
-	return kmalloc(6 * size, GFP_KERNEL);
+	int flags;
+	flags = GFP_KERNEL;
+	if (size > PAGE_SIZE / 6)
+		flags |= __GFP_UBC;
+	return kmalloc(6 * size, flags);
 }
 
 static void select_bits_free(void *bits, int size)
@@ -626,6 +632,7 @@ int do_sys_poll(struct pollfd __user *uf
  	unsigned int i;
 	struct poll_list *head;
  	struct poll_list *walk;
+	int flags;
 	struct fdtable *fdt;
 	int max_fdset;
 
@@ -643,12 +650,15 @@ int do_sys_poll(struct pollfd __user *uf
 	walk = NULL;
 	i = nfds;
 	err = -ENOMEM;
+	flags = GFP_KERNEL | __GFP_UBC;
 	while(i!=0) {
 		struct poll_list *pp;
+		if (i <= POLLFD_PER_PAGE)
+			flags &= ~__GFP_UBC;
 		pp = kmalloc(sizeof(struct poll_list)+
 				sizeof(struct pollfd)*
 				(i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i),
-					GFP_KERNEL);
+					flags);
 		if(pp==NULL)
 			goto out_fds;
 		pp->next=NULL;
diff -upr linux-2.6.16.46-0.12.orig/fs/seq_file.c linux-2.6.16.46-0.12-027test011/fs/seq_file.c
--- linux-2.6.16.46-0.12.orig/fs/seq_file.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/seq_file.c	2007-08-28 17:35:31.000000000 +0400
@@ -177,21 +177,23 @@ EXPORT_SYMBOL(seq_read);
 
 static int traverse(struct seq_file *m, loff_t offset)
 {
-	loff_t pos = 0;
+	loff_t pos = 0, index;
 	int error = 0;
 	void *p;
 
 	m->version = 0;
-	m->index = 0;
+	index = 0;
 	m->count = m->from = 0;
-	if (!offset)
+	if (!offset) {
+		m->index = index;
 		return 0;
+	}
 	if (!m->buf) {
 		m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
 		if (!m->buf)
 			return -ENOMEM;
 	}
-	p = m->op->start(m, &m->index);
+	p = m->op->start(m, &index);
 	while (p) {
 		error = PTR_ERR(p);
 		if (IS_ERR(p))
@@ -204,15 +206,17 @@ static int traverse(struct seq_file *m, 
 		if (pos + m->count > offset) {
 			m->from = offset - pos;
 			m->count -= m->from;
+			m->index = index;
 			break;
 		}
 		pos += m->count;
 		m->count = 0;
 		if (pos == offset) {
-			m->index++;
+			index++;
+			m->index = index;
 			break;
 		}
-		p = m->op->next(m, p, &m->index);
+		p = m->op->next(m, p, &index);
 	}
 	m->op->stop(m, p);
 	return error;
@@ -345,6 +349,8 @@ int seq_path(struct seq_file *m,
 	if (m->count < m->size) {
 		char *s = m->buf + m->count;
 		char *p = d_path(dentry, mnt, s, m->size - m->count);
+		if (IS_ERR(p) && PTR_ERR(p) != -ENAMETOOLONG)
+			return 0;
 		if (!IS_ERR(p)) {
 			while (s <= p) {
 				char c = *p++;
diff -upr linux-2.6.16.46-0.12.orig/fs/simfs.c linux-2.6.16.46-0.12-027test011/fs/simfs.c
--- linux-2.6.16.46-0.12.orig/fs/simfs.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/simfs.c	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,329 @@
+/*
+ *  fs/simfs.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/init.h>
+#include <linux/namei.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/vzquota.h>
+#include <linux/statfs.h>
+#include <linux/virtinfo.h>
+#include <linux/faudit.h>
+#include <linux/genhd.h>
+#include <linux/reiserfs_fs.h>
+
+#include <asm/unistd.h>
+#include <asm/uaccess.h>
+
+#define SIMFS_GET_LOWER_FS_SB(sb) sb->s_root->d_sb
+
+static struct super_operations sim_super_ops;
+
+static int sim_getattr(struct vfsmount *mnt, struct dentry *dentry,
+		struct kstat *stat)
+{
+	struct super_block *sb;
+	struct inode *inode;
+
+	inode = dentry->d_inode;
+	if (!inode->i_op->getattr) {
+		generic_fillattr(inode, stat);
+		if (!stat->blksize) {
+			unsigned blocks;
+
+			sb = inode->i_sb;
+			blocks = (stat->size + sb->s_blocksize-1) >>
+				sb->s_blocksize_bits;
+			stat->blocks = (sb->s_blocksize / 512) * blocks;
+			stat->blksize = sb->s_blocksize;
+		}
+	} else {
+		int err;
+
+		err = inode->i_op->getattr(mnt, dentry, stat);
+		if (err)
+			return err;
+	}
+
+	sb = mnt->mnt_sb;
+	if (sb->s_op == &sim_super_ops)
+		stat->dev = sb->s_dev;
+	return 0;
+}
+
+static void quota_get_stat(struct super_block *sb, struct kstatfs *buf)
+{
+	int err;
+	struct dq_stat qstat;
+	struct virt_info_quota q;
+	long free_file, adj_file;
+	s64 blk, free_blk, adj_blk;
+	int bsize_bits;
+
+	q.super = sb;
+	q.qstat = &qstat;
+	err = virtinfo_notifier_call(VITYPE_QUOTA, VIRTINFO_QUOTA_GETSTAT, &q);
+	if (err != NOTIFY_OK)
+		return;
+
+	bsize_bits = ffs(buf->f_bsize) - 1;
+	free_blk = (s64)(qstat.bsoftlimit - qstat.bcurrent) >> bsize_bits;
+	if (free_blk < 0)
+		free_blk = 0;
+	/*
+	 * In the regular case, we always set buf->f_bfree and buf->f_blocks to
+	 * the values reported by quota.  In case of real disk space shortage,
+	 * we adjust the values.  We want this adjustment to look as if the
+	 * total disk space were reduced, not as if the usage were increased.
+	 *    -- SAW
+	 */
+	adj_blk = 0;
+	if (buf->f_bfree < free_blk)
+		adj_blk = free_blk - buf->f_bfree;
+	buf->f_bfree = (long)(free_blk - adj_blk);
+
+	if (free_blk < buf->f_bavail)
+		buf->f_bavail = (long)free_blk; /* min(f_bavail, free_blk) */
+
+	blk = (qstat.bsoftlimit >> bsize_bits) - adj_blk;
+	buf->f_blocks = blk > LONG_MAX ? LONG_MAX : blk;
+
+	free_file = qstat.isoftlimit - qstat.icurrent;
+	if (free_file < 0)
+		free_file = 0;
+	if (buf->f_type == REISERFS_SUPER_MAGIC)
+		/*
+		 * reiserfs doesn't initialize f_ffree and f_files values of
+		 * kstatfs because it doesn't have an inode limit.
+		 */
+		buf->f_ffree = free_file;
+	adj_file = 0;
+	if (buf->f_ffree < free_file)
+		adj_file = free_file - buf->f_ffree;
+	buf->f_ffree = free_file - adj_file;
+	buf->f_files = qstat.isoftlimit - adj_file;
+}
+
+static int sim_statfs(struct super_block *sb, struct kstatfs *buf)
+{
+	int err;
+	struct super_block *lsb;
+	struct kstatfs statbuf;
+
+	err = 0;
+	if (sb->s_op != &sim_super_ops)
+		return 0;
+
+	memset(&statbuf, 0, sizeof(statbuf));
+	lsb = SIMFS_GET_LOWER_FS_SB(sb);
+
+	err = -ENOSYS;
+	if (lsb && lsb->s_op && lsb->s_op->statfs)
+		err = lsb->s_op->statfs(lsb, &statbuf);
+	if (err)
+		return err;
+
+	quota_get_stat(sb, &statbuf);
+
+	buf->f_files    = statbuf.f_files;
+	buf->f_ffree    = statbuf.f_ffree;
+	buf->f_blocks   = statbuf.f_blocks;
+	buf->f_bfree    = statbuf.f_bfree;
+	buf->f_bavail   = statbuf.f_bavail;
+	return 0;
+}
+
+static int sim_systemcall(struct vnotifier_block *me, unsigned long n,
+		void *d, int old_ret)
+{
+	int err;
+
+	switch (n) {
+	case VIRTINFO_FAUDIT_STAT: {
+		struct faudit_stat_arg *arg;
+
+		arg = (struct faudit_stat_arg *)d;
+		err = sim_getattr(arg->mnt, arg->dentry, arg->stat);
+		arg->err = err;
+		}
+		break;
+	case VIRTINFO_FAUDIT_STATFS: {
+		struct faudit_statfs_arg *arg;
+
+		arg = (struct faudit_statfs_arg *)d;
+		err = sim_statfs(arg->sb, arg->stat);
+		arg->err = err;
+		}
+		break;
+	default:
+		return old_ret;
+	}
+	return (err ? NOTIFY_BAD : NOTIFY_OK);
+}
+
+static struct inode *sim_quota_root(struct super_block *sb)
+{
+	return sb->s_root->d_inode;
+}
+
+/*
+ * NOTE: We need to setup s_bdev field on super block, since sys_quotactl()
+ * does lookup_bdev() and get_super() which are comparing sb->s_bdev.
+ * so this is a MUST if we want unmodified sys_quotactl
+ * to work correctly on /dev/simfs inside VE
+ */
+static int sim_init_blkdev(struct super_block *sb)
+{
+	static struct hd_struct fake_hd;
+	struct block_device *blkdev;
+
+	blkdev = bdget(sb->s_dev);
+	if (blkdev == NULL)
+		return -ENOMEM;
+
+	blkdev->bd_part = &fake_hd;	/* required for bdev_read_only() */
+	sb->s_bdev = blkdev;
+
+	return 0;
+}
+
+static void sim_free_blkdev(struct super_block *sb)
+{
+	/* set bd_part back to NULL */
+	sb->s_bdev->bd_part = NULL;
+	bdput(sb->s_bdev);
+}
+
+static void sim_quota_init(struct super_block *sb)
+{
+	struct virt_info_quota viq;
+
+	viq.super = sb;
+	virtinfo_notifier_call(VITYPE_QUOTA, VIRTINFO_QUOTA_ON, &viq);
+}
+
+static void sim_quota_free(struct super_block *sb)
+{
+	struct virt_info_quota viq;
+
+	viq.super = sb;
+	virtinfo_notifier_call(VITYPE_QUOTA, VIRTINFO_QUOTA_OFF, &viq);
+}
+
+static struct super_operations sim_super_ops = {
+	.get_quota_root	= sim_quota_root,
+};
+
+static int sim_fill_super(struct super_block *s, void *data)
+{
+	int err;
+	struct nameidata *nd;
+
+	err = set_anon_super(s, NULL);
+	if (err)
+		goto out;
+
+	err = 0;
+	nd = (struct nameidata *)data;
+	s->s_fs_info = mntget(nd->mnt);
+	s->s_root = dget(nd->dentry);
+	s->s_op = &sim_super_ops;
+out:
+	return err;
+}
+
+struct super_block *sim_get_sb(struct file_system_type *type,
+		int flags, const char *dev_name, void *opt)
+{
+	int err;
+	struct nameidata nd;
+	struct super_block *sb;
+
+	sb = ERR_PTR(-EINVAL);
+	if (opt == NULL)
+		goto out;
+
+	err = path_lookup(opt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
+	sb = ERR_PTR(err);
+	if (err)
+		goto out;
+
+	sb = sget(type, NULL, sim_fill_super, &nd);
+	if (IS_ERR(sb))
+		goto out_path;
+
+	err = sim_init_blkdev(sb);
+	if (err)
+		goto out_killsb;
+
+	sim_quota_init(sb);
+out_path:
+	path_release(&nd);
+out:
+	return sb;
+
+out_killsb:
+	up_write(&sb->s_umount);
+	deactivate_super(sb);
+	sb = ERR_PTR(-ENODEV);
+	goto out_path;
+}
+
+static void sim_kill_sb(struct super_block *sb)
+{
+	dput(sb->s_root);
+	sb->s_root = NULL;
+	mntput((struct vfsmount *)(sb->s_fs_info));
+
+	sim_quota_free(sb);
+	sim_free_blkdev(sb);
+
+	kill_anon_super(sb);
+}
+
+static struct file_system_type sim_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "simfs",
+	.get_sb		= sim_get_sb,
+	.kill_sb	= sim_kill_sb,
+};
+
+static struct vnotifier_block sim_syscalls = {
+	.notifier_call = sim_systemcall,
+};
+
+static int __init init_simfs(void)
+{
+	int err;
+
+	err = register_filesystem(&sim_fs_type);
+	if (err)
+		return err;
+
+	virtinfo_notifier_register(VITYPE_FAUDIT, &sim_syscalls);
+	return 0;
+}
+
+static void __exit exit_simfs(void)
+{
+	virtinfo_notifier_unregister(VITYPE_FAUDIT, &sim_syscalls);
+	unregister_filesystem(&sim_fs_type);
+}
+
+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
+MODULE_DESCRIPTION("Open Virtuozzo Simulation of File System");
+MODULE_LICENSE("GPL v2");
+
+module_init(init_simfs);
+module_exit(exit_simfs);
diff -upr linux-2.6.16.46-0.12.orig/fs/smbfs/inode.c linux-2.6.16.46-0.12-027test011/fs/smbfs/inode.c
--- linux-2.6.16.46-0.12.orig/fs/smbfs/inode.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/smbfs/inode.c	2007-08-28 17:35:29.000000000 +0400
@@ -234,7 +234,7 @@ smb_invalidate_inodes(struct smb_sb_info
 {
 	VERBOSE("\n");
 	shrink_dcache_sb(SB_of(server));
-	invalidate_inodes(SB_of(server));
+	invalidate_inodes(SB_of(server), 0);
 }
 
 /*
diff -upr linux-2.6.16.46-0.12.orig/fs/smbfs/sock.c linux-2.6.16.46-0.12-027test011/fs/smbfs/sock.c
--- linux-2.6.16.46-0.12.orig/fs/smbfs/sock.c	2007-08-24 19:28:11.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/smbfs/sock.c	2007-08-28 17:35:29.000000000 +0400
@@ -119,6 +119,7 @@ smb_close_socket(struct smb_sb_info *ser
 
 		VERBOSE("closing socket %p\n", sock);
 		sock->sk->sk_data_ready = server->data_ready;
+		sock->sk->sk_user_data = NULL;
 		server->sock_file = NULL;
 		fput(file);
 	}
diff -upr linux-2.6.16.46-0.12.orig/fs/stat.c linux-2.6.16.46-0.12-027test011/fs/stat.c
--- linux-2.6.16.46-0.12.orig/fs/stat.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/stat.c	2007-08-28 17:35:33.000000000 +0400
@@ -15,6 +15,7 @@
 #include <linux/namei.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
+#include <linux/faudit.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -42,11 +43,19 @@ int vfs_getattr(struct vfsmount *mnt, st
 {
 	struct inode *inode = dentry->d_inode;
 	int retval;
+	struct faudit_stat_arg arg;
 
 	retval = security_inode_getattr(mnt, dentry);
 	if (retval)
 		return retval;
 
+	arg.mnt = mnt;
+	arg.dentry = dentry;
+	arg.stat = stat;
+	if (virtinfo_notifier_call(VITYPE_FAUDIT, VIRTINFO_FAUDIT_STAT, &arg)
+			!= NOTIFY_DONE)
+		return arg.err;
+
 	if (inode->i_op->getattr)
 		return inode->i_op->getattr(mnt, dentry, stat);
 
diff -upr linux-2.6.16.46-0.12.orig/fs/super.c linux-2.6.16.46-0.12-027test011/fs/super.c
--- linux-2.6.16.46-0.12.orig/fs/super.c	2007-08-24 19:28:30.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/super.c	2007-08-28 17:35:33.000000000 +0400
@@ -37,6 +37,7 @@
 #include <linux/writeback.h>		/* for the emergency remount stuff */
 #include <linux/idr.h>
 #include <linux/kobject.h>
+#include <linux/ve_proto.h>
 #include <asm/uaccess.h>
 
 
@@ -45,7 +46,9 @@ void put_filesystem(struct file_system_t
 struct file_system_type *get_fs_type(const char *name);
 
 LIST_HEAD(super_blocks);
+EXPORT_SYMBOL_GPL(super_blocks);
 DEFINE_SPINLOCK(sb_lock);
+EXPORT_SYMBOL_GPL(sb_lock);
 
 /**
  *	alloc_super	-	create new superblock
@@ -80,8 +83,6 @@ static struct super_block *alloc_super(v
 		sema_init(&s->s_dquot.dqio_sem, 1);
 		sema_init(&s->s_dquot.dqonoff_sem, 1);
 		init_rwsem(&s->s_dquot.dqptr_sem);
-		s->s_prunes = 0;
-		init_waitqueue_head(&s->s_wait_prunes);
 		init_waitqueue_head(&s->s_wait_unfrozen);
 		s->s_maxbytes = MAX_NON_LFS;
 		s->dq_op = sb_dquot_ops;
@@ -236,14 +237,14 @@ void generic_shutdown_super(struct super
 
 	if (root) {
 		sb->s_root = NULL;
-		shrink_dcache_anon(sb);
 		shrink_dcache_parent(root);
+		shrink_dcache_anon(sb);
 		dput(root);
 		fsync_super(sb);
 		lock_super(sb);
 		sb->s_flags &= ~MS_ACTIVE;
 		/* bad name - it should be evict_inodes() */
-		invalidate_inodes(sb);
+		invalidate_inodes(sb, 0);
 		lock_kernel();
 
 		if (sop->write_super && sb->s_dirt)
@@ -252,7 +253,7 @@ void generic_shutdown_super(struct super
 			sop->put_super(sb);
 
 		/* Forget any remaining inodes */
-		if (invalidate_inodes(sb)) {
+		if (invalidate_inodes(sb, 1)) {
 			printk("VFS: Busy inodes after unmount of %s. "
 			   "Self-destruct in 5 seconds.  Have a nice day...\n",
 			   sb->s_id);
@@ -481,17 +482,25 @@ rescan:
 	spin_unlock(&sb_lock);
 	return NULL;
 }
+EXPORT_SYMBOL(user_get_super);
 
 asmlinkage long sys_ustat(unsigned dev, struct ustat __user * ubuf)
 {
         struct super_block *s;
         struct ustat tmp;
         struct kstatfs sbuf;
-	int err = -EINVAL;
+	dev_t kdev;
+	int err;
+
+	kdev = new_decode_dev(dev);
+	err = get_device_perms_ve(S_IFBLK, kdev, FMODE_READ);
+	if (err)
+		goto out;
 
-        s = user_get_super(new_decode_dev(dev));
-        if (s == NULL)
-                goto out;
+	err = -EINVAL;
+	s = user_get_super(kdev);
+	if (s == NULL)
+		goto out;
 	err = vfs_statfs(s, &sbuf);
 	drop_super(s);
 	if (err)
@@ -615,6 +624,13 @@ void emergency_remount(void)
 static struct idr unnamed_dev_idr;
 static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
 
+/* for compatibility with coreutils still unaware of new minor sizes */
+int unnamed_dev_majors[] = {
+	0, 144, 145, 146, 242, 243, 244, 245,
+	246, 247, 248, 249, 250, 251, 252, 253
+};
+EXPORT_SYMBOL(unnamed_dev_majors);
+
 int set_anon_super(struct super_block *s, void *data)
 {
 	int dev;
@@ -632,13 +648,13 @@ int set_anon_super(struct super_block *s
 	else if (error)
 		return -EAGAIN;
 
-	if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) {
+	if ((dev & MAX_ID_MASK) >= (1 << MINORBITS)) {
 		spin_lock(&unnamed_dev_lock);
 		idr_remove(&unnamed_dev_idr, dev);
 		spin_unlock(&unnamed_dev_lock);
 		return -EMFILE;
 	}
-	s->s_dev = MKDEV(0, dev & MINORMASK);
+	s->s_dev = make_unnamed_dev(dev);
 	return 0;
 }
 
@@ -646,8 +662,9 @@ EXPORT_SYMBOL(set_anon_super);
 
 void kill_anon_super(struct super_block *sb)
 {
-	int slot = MINOR(sb->s_dev);
+	int slot;
 
+	slot = unnamed_dev_idx(sb->s_dev);
 	generic_shutdown_super(sb);
 	spin_lock(&unnamed_dev_lock);
 	idr_remove(&unnamed_dev_idr, slot);
diff -upr linux-2.6.16.46-0.12.orig/fs/sysfs/bin.c linux-2.6.16.46-0.12-027test011/fs/sysfs/bin.c
--- linux-2.6.16.46-0.12.orig/fs/sysfs/bin.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/sysfs/bin.c	2007-08-28 17:35:31.000000000 +0400
@@ -120,6 +120,9 @@ static int open(struct inode * inode, st
 	struct bin_attribute * attr = to_bin_attr(file->f_dentry);
 	int error = -EINVAL;
 
+	if (!ve_sysfs_alowed())
+		return 0;
+
 	if (!kobj || !attr)
 		goto Done;
 
@@ -196,6 +199,9 @@ int sysfs_create_bin_file(struct kobject
 
 int sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
 {
+	if (!ve_sysfs_alowed())
+		return 0;
+
 	sysfs_hash_and_remove(kobj->dentry,attr->attr.name);
 	return 0;
 }
diff -upr linux-2.6.16.46-0.12.orig/fs/sysfs/dir.c linux-2.6.16.46-0.12-027test011/fs/sysfs/dir.c
--- linux-2.6.16.46-0.12.orig/fs/sysfs/dir.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/sysfs/dir.c	2007-08-28 17:35:31.000000000 +0400
@@ -174,6 +174,9 @@ int sysfs_create_dir(struct kobject * ko
 	struct dentry * parent;
 	int error = 0;
 
+	if (!ve_sysfs_alowed())
+		return 0;
+
 	BUG_ON(!kobj);
 
 	if (kobj->parent)
@@ -308,10 +311,14 @@ void sysfs_remove_subdir(struct dentry *
 
 void sysfs_remove_dir(struct kobject * kobj)
 {
-	struct dentry * dentry = dget(kobj->dentry);
+	struct dentry * dentry;
 	struct sysfs_dirent * parent_sd;
 	struct sysfs_dirent * sd, * tmp;
 
+	if (!ve_sysfs_alowed())
+		return;
+
+	dentry = dget(kobj->dentry);
 	if (!dentry)
 		return;
 
@@ -340,6 +347,9 @@ int sysfs_rename_dir(struct kobject * ko
 	int error = 0;
 	struct dentry * new_dentry, * parent;
 
+	if (!ve_sysfs_alowed())
+		return 0;
+
 	if (!strcmp(kobject_name(kobj), new_name))
 		return -EINVAL;
 
diff -upr linux-2.6.16.46-0.12.orig/fs/sysfs/file.c linux-2.6.16.46-0.12-027test011/fs/sysfs/file.c
--- linux-2.6.16.46-0.12.orig/fs/sysfs/file.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/sysfs/file.c	2007-08-28 17:35:34.000000000 +0400
@@ -332,7 +332,7 @@ static int sysfs_open_file(struct inode 
 {
 	char *p = d_path(filp->f_dentry, sysfs_mount, last_sysfs_file,
 			sizeof(last_sysfs_file));
-	if (p)
+	if (!IS_ERR(p))
 		memmove(last_sysfs_file, p, strlen(p) + 1);
 	return check_perm(inode,filp);
 }
@@ -395,6 +395,9 @@ int sysfs_add_file(struct dentry * dir, 
 
 int sysfs_create_file(struct kobject * kobj, const struct attribute * attr)
 {
+	if (!ve_sysfs_alowed())
+		return 0;
+
 	BUG_ON(!kobj || !kobj->dentry || !attr);
 
 	return sysfs_add_file(kobj->dentry, attr, SYSFS_KOBJ_ATTR);
@@ -413,6 +416,9 @@ int sysfs_update_file(struct kobject * k
 	struct dentry * victim;
 	int res = -ENOENT;
 
+	if (!ve_sysfs_alowed())
+		return 0;
+
 	mutex_lock(&dir->d_inode->i_mutex);
 	victim = lookup_one_len(attr->name, dir, strlen(attr->name));
 	if (!IS_ERR(victim)) {
@@ -483,6 +489,9 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file);
 
 void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
 {
+	if (!ve_sysfs_alowed())
+		return;
+
 	sysfs_hash_and_remove(kobj->dentry,attr->name);
 }
 
diff -upr linux-2.6.16.46-0.12.orig/fs/sysfs/group.c linux-2.6.16.46-0.12-027test011/fs/sysfs/group.c
--- linux-2.6.16.46-0.12.orig/fs/sysfs/group.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/sysfs/group.c	2007-08-28 17:35:31.000000000 +0400
@@ -46,6 +46,9 @@ int sysfs_create_group(struct kobject * 
 	struct dentry * dir;
 	int error;
 
+	if (!ve_sysfs_alowed())
+		return 0;
+
 	BUG_ON(!kobj || !kobj->dentry);
 
 	if (grp->name) {
@@ -68,6 +71,9 @@ void sysfs_remove_group(struct kobject *
 {
 	struct dentry * dir;
 
+	if (!ve_sysfs_alowed())
+		return;
+
 	if (grp->name)
 		dir = lookup_one_len(grp->name, kobj->dentry,
 				strlen(grp->name));
diff -upr linux-2.6.16.46-0.12.orig/fs/sysfs/inode.c linux-2.6.16.46-0.12-027test011/fs/sysfs/inode.c
--- linux-2.6.16.46-0.12.orig/fs/sysfs/inode.c	2007-08-24 19:28:06.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/sysfs/inode.c	2007-08-28 17:35:31.000000000 +0400
@@ -8,14 +8,13 @@
 
 #undef DEBUG 
 
+#include <linux/config.h>
 #include <linux/pagemap.h>
 #include <linux/namei.h>
 #include <linux/backing-dev.h>
 #include <linux/capability.h>
 #include "sysfs.h"
 
-extern struct super_block * sysfs_sb;
-
 static struct address_space_operations sysfs_aops = {
 	.readpage	= simple_readpage,
 	.prepare_write	= simple_prepare_write,
diff -upr linux-2.6.16.46-0.12.orig/fs/sysfs/mount.c linux-2.6.16.46-0.12-027test011/fs/sysfs/mount.c
--- linux-2.6.16.46-0.12.orig/fs/sysfs/mount.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/fs/sysfs/mount.c	2007-08-28 17:35:31.000000000 +0400
@@ -7,6 +7,7 @@
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/pagemap.h>
+#include <linux/module.h>
 #include <linux/init.h>
 
 #include "sysfs.h"
@@ -14,8 +15,11 @@
 /* Random magic number */
 #define SYSFS_MAGIC 0x62656572
 
+#ifndef CONFIG_VE
 struct vfsmount *sysfs_mount;
 struct super_block * sysfs_sb = NULL;
+#endif
+
 kmem_cache_t *sysfs_dir_cachep;
 
 static struct super_operations sysfs_ops = {
@@ -31,6 +35,15 @@ static struct sysfs_dirent sysfs_root = 
 	.s_iattr	= NULL,
 };
 
+#ifdef CONFIG_VE
+static void init_ve0_sysfs_root(void)
+{
+	get_ve0()->sysfs_root = &sysfs_root;
+}
+
+#define sysfs_root (*(get_exec_env()->sysfs_root))
+#endif
+
 static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct inode *inode;
@@ -72,16 +85,21 @@ static struct super_block *sysfs_get_sb(
 	return get_sb_single(fs_type, flags, data, sysfs_fill_super);
 }
 
-static struct file_system_type sysfs_fs_type = {
+struct file_system_type sysfs_fs_type = {
 	.name		= "sysfs",
 	.get_sb		= sysfs_get_sb,
 	.kill_sb	= kill_litter_super,
 };
 
+EXPORT_SYMBOL(sysfs_fs_type);
+
 int __init sysfs_init(void)
 {
 	int err = -ENOMEM;
 
+#ifdef CONFIG_VE
+	init_ve0_sysfs_root();
+#endif
 	sysfs_dir_cachep = kmem_cache_create("sysfs_dir_cache",
 					      sizeof(struct sysfs_dirent),
 					      0, 0, NULL, NULL);
diff -upr linux-2.6.16.46-0.12.orig/fs/sysfs/symlink.c linux-2.6.16.46-0.12-027test011/fs/sysfs/symlink.c
--- linux-2.6.16.46-0.12.orig/fs/sysfs/symlink.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/sysfs/symlink.c	2007-08-28 17:35:31.000000000 +0400
@@ -87,6 +87,9 @@ int sysfs_create_link(struct kobject * k
 
 	BUG_ON(!kobj || !kobj->dentry || !name);
 
+	if (!ve_sysfs_alowed())
+		return 0;
+
 	mutex_lock(&dentry->d_inode->i_mutex);
 	if (!sysfs_dirent_exist(dentry->d_fsdata, name))
 		error = sysfs_add_link(dentry, name, target);
@@ -103,6 +106,9 @@ int sysfs_create_link(struct kobject * k
 
 void sysfs_remove_link(struct kobject * kobj, const char * name)
 {
+	if(!ve_sysfs_alowed())
+		return;
+
 	sysfs_hash_and_remove(kobj->dentry,name);
 }
 
diff -upr linux-2.6.16.46-0.12.orig/fs/sysfs/sysfs.h linux-2.6.16.46-0.12-027test011/fs/sysfs/sysfs.h
--- linux-2.6.16.46-0.12.orig/fs/sysfs/sysfs.h	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/sysfs/sysfs.h	2007-08-28 17:35:31.000000000 +0400
@@ -1,5 +1,14 @@
 
-extern struct vfsmount * sysfs_mount;
+#ifndef CONFIG_VE
+extern struct vfsmount *sysfs_mount;
+extern struct super_block *sysfs_sb;
+#define ve_sysfs_alowed()	(1)
+#else
+#define sysfs_mount		(get_exec_env()->sysfs_mnt)
+#define sysfs_sb		(get_exec_env()->sysfs_sb)
+#define ve_sysfs_alowed()	(sysfs_sb != NULL)
+#endif
+
 extern kmem_cache_t *sysfs_dir_cachep;
 
 extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *);
@@ -20,7 +29,6 @@ extern void sysfs_drop_dentry(struct sys
 extern int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
 
 extern struct rw_semaphore sysfs_rename_sem;
-extern struct super_block * sysfs_sb;
 extern struct file_operations sysfs_dir_operations;
 extern struct file_operations sysfs_file_operations;
 extern struct file_operations bin_fops;
diff -upr linux-2.6.16.46-0.12.orig/fs/vzdq_file.c linux-2.6.16.46-0.12-027test011/fs/vzdq_file.c
--- linux-2.6.16.46-0.12.orig/fs/vzdq_file.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/vzdq_file.c	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,892 @@
+/*
+ *
+ * Copyright (C) 2005 SWsoft
+ * All rights reserved.
+ *
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * This file contains Virtuozzo quota files as proc entry implementation.
+ * It is required for std quota tools to work correctly as they are expecting
+ * aquota.user and aquota.group files.
+ */
+
+#include <linux/ctype.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/sysctl.h>
+#include <linux/mount.h>
+#include <linux/namespace.h>
+#include <linux/quotaio_v2.h>
+#include <asm/uaccess.h>
+
+#include <linux/ve.h>
+#include <linux/ve_proto.h>
+#include <linux/vzdq_tree.h>
+#include <linux/vzquota.h>
+
+/* ----------------------------------------------------------------------
+ *
+ * File read operation
+ *
+ * FIXME: functions in this section (as well as many functions in vzdq_ugid.c,
+ * perhaps) abuse vz_quota_sem.
+ * Taking a global semaphore for lengthy and user-controlled operations inside
+ * VPSs is not a good idea in general.
+ * In this case, the reasons for taking this semaphore are completely unclear,
+ * especially taking into account that the only function that has comments
+ * about the necessity to be called under this semaphore
+ * (create_proc_quotafile) is actually called OUTSIDE it.
+ *
+ * --------------------------------------------------------------------- */
+
+#define DQBLOCK_SIZE		1024
+#define DQUOTBLKNUM		21U
+#define DQTREE_DEPTH		4
+#define TREENUM_2_BLKNUM(num)	(((num) + 1) << 1)
+#define ISINDBLOCK(num)		((num)%2 != 0)
+#define FIRST_DATABLK	  	2  /* first even number */
+#define LAST_IND_LEVEL		(DQTREE_DEPTH - 1)
+#define CONVERT_LEVEL(level)	((level) * (QUOTAID_EBITS/QUOTAID_BBITS))
+#define GETLEVINDX(ind, lev)	(((ind) >> QUOTAID_BBITS*(lev)) \
+					& QUOTATREE_BMASK)
+
+#if (QUOTAID_EBITS / QUOTAID_BBITS) != (QUOTATREE_DEPTH / DQTREE_DEPTH)
+#error xBITS and DQTREE_DEPTH does not correspond
+#endif
+
+#define BLOCK_NOT_FOUND	1
+
+/* data for quota file -- one per proc entry */
+struct quotatree_data {
+	struct list_head	list;
+	struct vz_quota_master	*qmblk;
+	int			type;	/* type of the tree */
+};
+
+/* serialized by vz_quota_sem */
+static LIST_HEAD(qf_data_head);
+
+static const u_int32_t vzquota_magics[] = V2_INITQMAGICS;
+static const u_int32_t vzquota_versions[] = V2_INITQVERSIONS;
+
+static inline loff_t get_depoff(int depth)
+{
+	loff_t res = 1;
+	while (depth) {
+		res += (1 << ((depth - 1)*QUOTAID_EBITS + 1));
+		depth--;
+	}
+	return res;
+}
+
+static inline loff_t get_blknum(loff_t num, int depth)
+{
+	loff_t res;
+	res = (num << 1) + get_depoff(depth);
+	return res;
+}
+
+static int get_depth(loff_t num)
+{
+	int i;
+	for (i = 0; i < DQTREE_DEPTH; i++) {
+		if (num >= get_depoff(i) && (i == DQTREE_DEPTH - 1
+				|| num < get_depoff(i + 1)))
+			return i;
+	}
+	return -1;
+}
+
+static inline loff_t get_offset(loff_t num)
+{
+	loff_t res, tmp;
+
+	tmp = get_depth(num);
+	if (tmp < 0)
+		return -1;
+	num -= get_depoff(tmp);
+	BUG_ON(num < 0);
+	res = num >> 1;
+
+	return res;
+}
+
+static inline loff_t get_quot_blk_num(struct quotatree_tree *tree, int level)
+{
+	/* return maximum available block num */
+	return tree->levels[level].freenum;
+}
+
+static inline loff_t get_block_num(struct quotatree_tree *tree)
+{
+	loff_t ind_blk_num, quot_blk_num, max_ind, max_quot;
+
+	quot_blk_num = get_quot_blk_num(tree, CONVERT_LEVEL(DQTREE_DEPTH) - 1);
+	max_quot = TREENUM_2_BLKNUM(quot_blk_num);
+	ind_blk_num = get_quot_blk_num(tree, CONVERT_LEVEL(DQTREE_DEPTH - 1));
+	max_ind = (quot_blk_num) ? get_blknum(ind_blk_num, LAST_IND_LEVEL)
+		: get_blknum(ind_blk_num, 0);
+
+	return (max_ind > max_quot) ? max_ind + 1 : max_quot + 1;
+}
+
+/*  Write quota file header */
+static int read_header(void *buf, struct quotatree_tree *tree,
+	struct dq_info *dq_ugid_info, int type)
+{
+	struct v2_disk_dqheader *dqh;
+	struct v2_disk_dqinfo *dq_disk_info;
+
+	dqh = buf;
+	dq_disk_info = buf + sizeof(struct v2_disk_dqheader);
+
+	dqh->dqh_magic = vzquota_magics[type];
+	dqh->dqh_version = vzquota_versions[type];
+
+	dq_disk_info->dqi_bgrace = dq_ugid_info[type].bexpire;
+	dq_disk_info->dqi_igrace = dq_ugid_info[type].iexpire;
+	dq_disk_info->dqi_flags = 0;	/* no flags */
+	dq_disk_info->dqi_blocks = get_block_num(tree);
+	dq_disk_info->dqi_free_blk = 0;	/* first block in the file */
+	dq_disk_info->dqi_free_entry = FIRST_DATABLK;
+
+	return 0;
+}
+
+static int get_block_child(int depth, struct quotatree_node *p, u_int32_t *buf)
+{
+	int i, j, lev_num;
+
+	lev_num = QUOTATREE_DEPTH/DQTREE_DEPTH - 1;
+	for (i = 0; i < BLOCK_SIZE/sizeof(u_int32_t); i++) {
+		struct quotatree_node *next, *parent;
+
+		parent = p;
+		next = p;
+		for (j = lev_num; j >= 0; j--) {
+			if (!next->blocks[GETLEVINDX(i,j)]) {
+				buf[i] = 0;
+				goto bad_branch;
+			}
+			parent = next;
+			next = next->blocks[GETLEVINDX(i,j)];
+		}
+		buf[i] = (depth == DQTREE_DEPTH - 1) ?
+			TREENUM_2_BLKNUM(parent->num)
+			: get_blknum(next->num, depth + 1);
+
+	bad_branch:
+		;
+	}
+
+	return 0;
+}
+
+/*
+ * Write index block to disk (or buffer)
+ * @buf has length 256*sizeof(u_int32_t) bytes
+ */
+static int read_index_block(int num, u_int32_t *buf,
+		struct quotatree_tree *tree)
+{
+	struct quotatree_node *p;
+	u_int32_t index;
+	loff_t off;
+	int depth, res;
+
+	res = BLOCK_NOT_FOUND;
+	index = 0;
+	depth = get_depth(num);
+	off = get_offset(num);
+	if (depth < 0 || off < 0)
+		return -EINVAL;
+
+	list_for_each_entry(p, &tree->levels[CONVERT_LEVEL(depth)].usedlh,
+			list) {
+		if (p->num >= off)
+			res = 0;
+		if (p->num != off)
+			continue;
+		get_block_child(depth, p, buf);
+		break;
+	}
+
+	return res;
+}
+
+static inline void convert_quot_format(struct v2_disk_dqblk *dq,
+		struct vz_quota_ugid *vzq)
+{
+	dq->dqb_id = vzq->qugid_id;
+	dq->dqb_ihardlimit = vzq->qugid_stat.ihardlimit;
+	dq->dqb_isoftlimit = vzq->qugid_stat.isoftlimit;
+	dq->dqb_curinodes = vzq->qugid_stat.icurrent;
+	dq->dqb_bhardlimit = vzq->qugid_stat.bhardlimit / QUOTABLOCK_SIZE;
+	dq->dqb_bsoftlimit = vzq->qugid_stat.bsoftlimit / QUOTABLOCK_SIZE;
+	dq->dqb_curspace = vzq->qugid_stat.bcurrent;
+	dq->dqb_btime = vzq->qugid_stat.btime;
+	dq->dqb_itime = vzq->qugid_stat.itime;
+}
+
+static int read_dquot(loff_t num, void *buf, struct quotatree_tree *tree)
+{
+	int res, i, entries = 0;
+	struct v2_disk_dqdbheader *dq_header;
+	struct quotatree_node *p;
+	struct v2_disk_dqblk *blk = buf + sizeof(struct v2_disk_dqdbheader);
+
+	res = BLOCK_NOT_FOUND;
+	dq_header = buf;
+	memset(dq_header, 0, sizeof(*dq_header));
+
+	list_for_each_entry(p, &(tree->levels[QUOTATREE_DEPTH - 1].usedlh),
+			list) {
+		if (TREENUM_2_BLKNUM(p->num) >= num)
+			res = 0;
+		if (TREENUM_2_BLKNUM(p->num) != num)
+			continue;
+
+		for (i = 0; i < QUOTATREE_BSIZE; i++) {
+			if (!p->blocks[i])
+				continue;
+			convert_quot_format(blk + entries,
+					(struct vz_quota_ugid *)p->blocks[i]);
+			entries++;
+			res = 0;
+		}
+		break;
+	}
+	dq_header->dqdh_entries = entries;
+
+	return res;
+}
+
+static int read_block(int num, void *buf, struct quotatree_tree *tree,
+	struct dq_info *dq_ugid_info, int magic)
+{
+	int res;
+
+	memset(buf, 0, DQBLOCK_SIZE);
+	if (!num)
+		res = read_header(buf, tree, dq_ugid_info, magic);
+	else if (ISINDBLOCK(num))
+		res = read_index_block(num, (u_int32_t*)buf, tree);
+	else
+		res = read_dquot(num, buf, tree);
+
+	return res;
+}
+
+/*
+ * FIXME: this function can handle quota files up to 2GB only.
+ */
+static int read_proc_quotafile(char *page, char **start, off_t off, int count,
+		int *eof, void *data)
+{
+	off_t blk_num, blk_off, buf_off;
+	char *tmp;
+	size_t buf_size;
+	struct quotatree_data *qtd;
+	struct quotatree_tree *tree;
+	struct dq_info *dqi;
+	int res;
+
+	tmp = kmalloc(DQBLOCK_SIZE, GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	qtd = data;
+	down(&vz_quota_sem);
+	down(&qtd->qmblk->dq_sem);
+
+	res = 0;
+	tree = QUGID_TREE(qtd->qmblk, qtd->type);
+	if (!tree) {
+		*eof = 1;
+		goto out_dq;
+	}
+
+	dqi = &qtd->qmblk->dq_ugid_info[qtd->type];
+
+	buf_off = 0;
+	buf_size = count;
+	blk_num = off / DQBLOCK_SIZE;
+	blk_off = off % DQBLOCK_SIZE;
+
+	while (buf_size > 0) {
+		off_t len;
+
+		len = min((size_t)(DQBLOCK_SIZE-blk_off), buf_size);
+		res = read_block(blk_num, tmp, tree, dqi, qtd->type);
+		if (res < 0)
+			goto out_err;
+		if (res == BLOCK_NOT_FOUND) {
+			*eof = 1;
+			break;
+		}
+		memcpy(page + buf_off, tmp + blk_off, len);
+
+		blk_num++;
+		buf_size -= len;
+		blk_off = 0;
+		buf_off += len;
+	}
+	res = buf_off;
+
+out_err:
+	*start = NULL + count;
+out_dq:
+	up(&qtd->qmblk->dq_sem);
+	up(&vz_quota_sem);
+	kfree(tmp);
+
+	return res;
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * /proc/vz/vzaquota/QID/aquota.* files
+ *
+ * FIXME: this code lacks serialization of read/readdir/lseek.
+ * However, this problem should be fixed after the mainstream issue of what
+ * appears to be non-atomic read and update of file position in sys_read.
+ *
+ * --------------------------------------------------------------------- */
+
+static inline unsigned long vzdq_aquot_getino(dev_t dev)
+{
+	return 0xec000000UL + dev;
+}
+
+static inline dev_t vzdq_aquot_getidev(struct inode *inode)
+{
+	return (dev_t)(unsigned long)PROC_I(inode)->op.proc_get_link;
+}
+
+static inline void vzdq_aquot_setidev(struct inode *inode, dev_t dev)
+{
+	PROC_I(inode)->op.proc_get_link = (void *)(unsigned long)dev;
+}
+
+static ssize_t vzdq_aquotf_read(struct file *file,
+		char __user *buf, size_t size, loff_t *ppos)
+{
+	char *page;
+	size_t bufsize;
+	ssize_t l, l2, copied;
+	char *start;
+	struct inode *inode;
+	struct block_device *bdev;
+	struct super_block *sb;
+	struct quotatree_data data;
+	int eof, err;
+
+	err = -ENOMEM;
+	page = (char *)__get_free_page(GFP_KERNEL);
+	if (page == NULL)
+		goto out_err;
+
+	err = -ENODEV;
+	inode = file->f_dentry->d_inode;
+	bdev = bdget(vzdq_aquot_getidev(inode));
+	if (bdev == NULL)
+		goto out_err;
+	sb = get_super(bdev);
+	bdput(bdev);
+	if (sb == NULL)
+		goto out_err;
+	data.qmblk = vzquota_find_qmblk(sb);
+	data.type = PROC_I(inode)->type - 1;
+	drop_super(sb);
+	if (data.qmblk == NULL || data.qmblk == VZ_QUOTA_BAD)
+		goto out_err;
+
+	copied = 0;
+	l = l2 = 0;
+	while (1) {
+		bufsize = min(size, (size_t)PAGE_SIZE);
+		if (bufsize <= 0)
+			break;
+
+		l = read_proc_quotafile(page, &start, *ppos, bufsize,
+				&eof, &data);
+		if (l <= 0)
+			break;
+
+		l2 = copy_to_user(buf, page, l);
+		copied += l - l2;
+		if (l2)
+			break;
+
+		buf += l;
+		size -= l;
+		*ppos += (unsigned long)start;
+		l = l2 = 0;
+	}
+
+	qmblk_put(data.qmblk);
+	free_page((unsigned long)page);
+	if (copied)
+		return copied;
+	else if (l2)		/* last copy_to_user failed */
+		return -EFAULT;
+	else			/* read error or EOF */
+		return l;
+
+out_err:
+	if (page != NULL)
+		free_page((unsigned long)page);
+	return err;
+}
+
+static struct file_operations vzdq_aquotf_file_operations = {
+	.read		= &vzdq_aquotf_read,
+};
+
+static struct inode_operations vzdq_aquotf_inode_operations = {
+};
+
+
+/* ----------------------------------------------------------------------
+ *
+ * /proc/vz/vzaquota/QID directory
+ *
+ * --------------------------------------------------------------------- */
+
+static int vzdq_aquotq_readdir(struct file *file, void *data, filldir_t filler)
+{
+	loff_t n;
+	int err;
+
+	n = file->f_pos;
+	for (err = 0; !err; n++) {
+		/* ppc32 can't cmp 2 long long's in switch, calls __cmpdi2() */
+		switch ((unsigned long)n) {
+		case 0:
+			err = (*filler)(data, ".", 1, n,
+					file->f_dentry->d_inode->i_ino,
+					DT_DIR);
+			break;
+		case 1:
+			err = (*filler)(data, "..", 2, n,
+					parent_ino(file->f_dentry), DT_DIR);
+			break;
+		case 2:
+			err = (*filler)(data, "aquota.user", 11, n,
+					file->f_dentry->d_inode->i_ino
+								+ USRQUOTA + 1,
+					DT_REG);
+			break;
+		case 3:
+			err = (*filler)(data, "aquota.group", 12, n,
+					file->f_dentry->d_inode->i_ino
+								+ GRPQUOTA + 1,
+					DT_REG);
+			break;
+		default:
+			goto out;
+		}
+	}
+out:
+	file->f_pos = n;
+	return err;
+}
+
+struct vzdq_aquotq_lookdata {
+	dev_t dev;
+	int type;
+	struct vz_quota_master *qmblk;
+};
+
+static int vzdq_aquotq_looktest(struct inode *inode, void *data)
+{
+	struct vzdq_aquotq_lookdata *d;
+
+	d = data;
+	return inode->i_op == &vzdq_aquotf_inode_operations &&
+	       vzdq_aquot_getidev(inode) == d->dev &&
+	       PROC_I(inode)->type == d->type + 1;
+}
+
+static int vzdq_aquotq_lookset(struct inode *inode, void *data)
+{
+	struct vzdq_aquotq_lookdata *d;
+	struct quotatree_tree *tree;
+
+	d = data;
+	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+	inode->i_ino = vzdq_aquot_getino(d->dev) + d->type + 1;
+	inode->i_mode = S_IFREG | S_IRUSR;
+	inode->i_uid = 0;
+	inode->i_gid = 0;
+	inode->i_nlink = 1;
+	inode->i_op = &vzdq_aquotf_inode_operations;
+	inode->i_fop = &vzdq_aquotf_file_operations;
+	PROC_I(inode)->type = d->type + 1;
+	vzdq_aquot_setidev(inode, d->dev);
+
+	/* Setting size */
+	tree = QUGID_TREE(d->qmblk, d->type);
+	inode->i_size = get_block_num(tree) * 1024;
+	return 0;
+}
+
+static int vzdq_aquotq_revalidate(struct dentry *vdentry, struct nameidata *nd)
+{
+	return 0;
+}
+
+static struct dentry_operations vzdq_aquotq_dentry_operations = {
+	.d_revalidate	= &vzdq_aquotq_revalidate,
+};
+
+static struct vz_quota_master *find_qmblk_by_dev(dev_t dev)
+{
+	struct super_block *sb;
+	struct vz_quota_master *qmblk;
+
+	qmblk = NULL;
+	sb = user_get_super(dev);
+	if (sb != NULL) {
+		qmblk = vzquota_find_qmblk(sb);
+		drop_super(sb);
+
+		if (qmblk == VZ_QUOTA_BAD)
+			qmblk = NULL;
+	}
+
+	return qmblk;
+}
+
+static struct dentry *vzdq_aquotq_lookup(struct inode *dir,
+		struct dentry *dentry,
+		struct nameidata *nd)
+{
+	struct inode *inode;
+	struct vzdq_aquotq_lookdata d;
+	int k;
+
+	if (dentry->d_name.len == 11) {
+		if (memcmp(dentry->d_name.name, "aquota.user", 11))
+			goto out;
+		k = USRQUOTA;
+	} else if (dentry->d_name.len == 12) {
+		if (memcmp(dentry->d_name.name, "aquota.group", 12))
+			goto out;
+		k = GRPQUOTA;
+	} else
+		goto out;
+	d.dev = vzdq_aquot_getidev(dir);
+	d.type = k;
+	d.qmblk = find_qmblk_by_dev(d.dev);
+	if (d.qmblk == NULL)
+		goto out;
+
+	inode = iget5_locked(dir->i_sb, dir->i_ino + k + 1,
+			vzdq_aquotq_looktest, vzdq_aquotq_lookset, &d);
+
+	/* qmlbk ref is not needed, we used it for i_size calculation only */
+	qmblk_put(d.qmblk);
+	if (inode == NULL)
+		goto out;
+
+	unlock_new_inode(inode);
+	dentry->d_op = &vzdq_aquotq_dentry_operations;
+	d_add(dentry, inode);
+	return NULL;
+
+out:
+	return ERR_PTR(-ENOENT);
+}
+
+static struct file_operations vzdq_aquotq_file_operations = {
+	.read		= &generic_read_dir,
+	.readdir	= &vzdq_aquotq_readdir,
+};
+
+static struct inode_operations vzdq_aquotq_inode_operations = {
+	.lookup		= &vzdq_aquotq_lookup,
+};
+
+
+/* ----------------------------------------------------------------------
+ *
+ * /proc/vz/vzaquota directory
+ *
+ * --------------------------------------------------------------------- */
+
+struct vzdq_aquot_de {
+	struct list_head list;
+	struct vfsmount *mnt;
+};
+
+static int vzdq_aquot_buildmntlist(struct ve_struct *ve,
+		struct list_head *head)
+{
+	struct vfsmount *rmnt, *mnt;
+	struct vzdq_aquot_de *p;
+	int err;
+
+#ifdef CONFIG_VE
+	rmnt = mntget(ve->fs_rootmnt);
+#else
+	read_lock(&current->fs->lock);
+	rmnt = mntget(current->fs->rootmnt);
+	read_unlock(&current->fs->lock);
+#endif
+	mnt = rmnt;
+	spin_lock(&vfsmount_lock);
+	while (1) {
+		list_for_each_entry(p, head, list) {
+			if (p->mnt->mnt_sb == mnt->mnt_sb)
+				goto skip;
+		}
+
+		err = -ENOMEM;
+		p = kmalloc(sizeof(*p), GFP_ATOMIC);
+		if (p == NULL)
+			goto out;
+		p->mnt = mntget(mnt);
+		list_add_tail(&p->list, head);
+
+skip:
+		err = 0;
+		if (list_empty(&mnt->mnt_mounts)) {
+			while (1) {
+				if (mnt == rmnt)
+					goto out;
+				if (mnt->mnt_child.next !=
+						&mnt->mnt_parent->mnt_mounts)
+					break;
+				mnt = mnt->mnt_parent;
+			}
+			mnt = list_entry(mnt->mnt_child.next,
+					struct vfsmount, mnt_child);
+		} else
+			mnt = list_entry(mnt->mnt_mounts.next,
+					struct vfsmount, mnt_child);
+	}
+out:
+	spin_unlock(&vfsmount_lock);
+	mntput(rmnt);
+	return err;
+}
+
+static void vzdq_aquot_releasemntlist(struct ve_struct *ve,
+		struct list_head *head)
+{
+	struct vzdq_aquot_de *p;
+
+	while (!list_empty(head)) {
+		p = list_entry(head->next, typeof(*p), list);
+		mntput(p->mnt);
+		list_del(&p->list);
+		kfree(p);
+	}
+}
+
+static int vzdq_aquotd_readdir(struct file *file, void *data, filldir_t filler)
+{
+	struct ve_struct *ve, *old_ve;
+	struct list_head mntlist;
+	struct vzdq_aquot_de *de;
+	struct super_block *sb;
+	struct vz_quota_master *qmblk;
+	loff_t i, n;
+	char buf[24];
+	int l, err;
+
+	i = 0;
+	n = file->f_pos;
+	ve = file->f_dentry->d_sb->s_type->owner_env;
+	old_ve = set_exec_env(ve);
+
+	INIT_LIST_HEAD(&mntlist);
+#ifdef CONFIG_VE
+	/*
+	 * The only reason of disabling readdir for the host system is that
+	 * this readdir can be slow and CPU consuming with large number of VPSs
+	 * (or just mount points).
+	 */
+	err = ve_is_super(ve);
+#else
+	err = 0;
+#endif
+	if (!err) {
+		err = vzdq_aquot_buildmntlist(ve, &mntlist);
+		if (err)
+			goto out_err;
+	}
+
+	if (i >= n) {
+		if ((*filler)(data, ".", 1, i,
+					file->f_dentry->d_inode->i_ino, DT_DIR))
+			goto out_fill;
+	}
+	i++;
+
+	if (i >= n) {
+		if ((*filler)(data, "..", 2, i,
+					parent_ino(file->f_dentry), DT_DIR))
+			goto out_fill;
+	}
+	i++;
+
+	list_for_each_entry (de, &mntlist, list) {
+		sb = de->mnt->mnt_sb;
+		if (get_device_perms_ve(S_IFBLK, sb->s_dev, FMODE_QUOTACTL))
+			continue;
+
+		qmblk = vzquota_find_qmblk(sb);
+		if (qmblk == NULL || qmblk == VZ_QUOTA_BAD)
+			continue;
+
+		qmblk_put(qmblk);
+		i++;
+		if (i <= n)
+			continue;
+
+		l = sprintf(buf, "%08x", new_encode_dev(sb->s_dev));
+		if ((*filler)(data, buf, l, i - 1,
+					vzdq_aquot_getino(sb->s_dev), DT_DIR))
+			break;
+	}
+
+out_fill:
+	err = 0;
+	file->f_pos = i;
+out_err:
+	vzdq_aquot_releasemntlist(ve, &mntlist);
+	(void)set_exec_env(old_ve);
+	return err;
+}
+
+static int vzdq_aquotd_looktest(struct inode *inode, void *data)
+{
+	return inode->i_op == &vzdq_aquotq_inode_operations &&
+	       vzdq_aquot_getidev(inode) == (dev_t)(unsigned long)data;
+}
+
+static int vzdq_aquotd_lookset(struct inode *inode, void *data)
+{
+	dev_t dev;
+
+	dev = (dev_t)(unsigned long)data;
+	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+	inode->i_ino = vzdq_aquot_getino(dev);
+	inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR;
+	inode->i_uid = 0;
+	inode->i_gid = 0;
+	inode->i_nlink = 2;
+	inode->i_op = &vzdq_aquotq_inode_operations;
+	inode->i_fop = &vzdq_aquotq_file_operations;
+	vzdq_aquot_setidev(inode, dev);
+	return 0;
+}
+
+static struct dentry *vzdq_aquotd_lookup(struct inode *dir,
+		struct dentry *dentry,
+		struct nameidata *nd)
+{
+	struct ve_struct *ve, *old_ve;
+	const unsigned char *s;
+	int l;
+	dev_t dev;
+	struct inode *inode;
+
+	ve = dir->i_sb->s_type->owner_env;
+	old_ve = set_exec_env(ve);
+#ifdef CONFIG_VE
+	/*
+	 * Lookup is much lighter than readdir, so it can be allowed for the
+	 * host system.  But it would be strange to be able to do lookup only
+	 * without readdir...
+	 */
+	if (ve_is_super(ve))
+		goto out;
+#endif
+
+	dev = 0;
+	l = dentry->d_name.len;
+	if (l <= 0)
+		goto out;
+	for (s = dentry->d_name.name; l > 0; s++, l--) {
+		if (!isxdigit(*s))
+			goto out;
+		if (dev & ~(~0UL >> 4))
+			goto out;
+		dev <<= 4;
+		if (isdigit(*s))
+			dev += *s - '0';
+		else if (islower(*s))
+			dev += *s - 'a' + 10;
+		else
+			dev += *s - 'A' + 10;
+	}
+	dev = new_decode_dev(dev);
+
+	if (get_device_perms_ve(S_IFBLK, dev, FMODE_QUOTACTL))
+		goto out;
+
+	inode = iget5_locked(dir->i_sb, vzdq_aquot_getino(dev),
+			vzdq_aquotd_looktest, vzdq_aquotd_lookset,
+			(void *)(unsigned long)dev);
+	if (inode == NULL)
+		goto out;
+	unlock_new_inode(inode);
+
+	d_add(dentry, inode);
+	(void)set_exec_env(old_ve);
+	return NULL;
+
+out:
+	(void)set_exec_env(old_ve);
+	return ERR_PTR(-ENOENT);
+}
+
+static struct file_operations vzdq_aquotd_file_operations = {
+	.read		= &generic_read_dir,
+	.readdir	= &vzdq_aquotd_readdir,
+};
+
+static struct inode_operations vzdq_aquotd_inode_operations = {
+	.lookup		= &vzdq_aquotd_lookup,
+};
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Initialization and deinitialization
+ *
+ * --------------------------------------------------------------------- */
+
+/*
+ * FIXME: creation of proc entries here is unsafe with respect to module
+ * unloading.
+ */
+void vzaquota_init(void)
+{
+	struct proc_dir_entry *de;
+
+	de = create_proc_glob_entry("vz/vzaquota",
+			S_IFDIR | S_IRUSR | S_IXUSR, NULL);
+	if (de != NULL) {
+		de->proc_iops = &vzdq_aquotd_inode_operations;
+		de->proc_fops = &vzdq_aquotd_file_operations;
+	} else
+		printk("VZDQ: vz/vzaquota creation failed\n");
+#if defined(CONFIG_SYSCTL)
+	de = create_proc_glob_entry("sys/fs/quota",
+			S_IFDIR | S_IRUSR | S_IXUSR, NULL);
+	if (de == NULL)
+		printk("VZDQ: sys/fs/quota creation failed\n");
+#endif
+}
+
+void vzaquota_fini(void)
+{
+	remove_proc_entry("vz/vzaquota", NULL);
+}
diff -upr linux-2.6.16.46-0.12.orig/fs/vzdq_mgmt.c linux-2.6.16.46-0.12-027test011/fs/vzdq_mgmt.c
--- linux-2.6.16.46-0.12.orig/fs/vzdq_mgmt.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/vzdq_mgmt.c	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,758 @@
+/*
+ * Copyright (C) 2001, 2002, 2004, 2005  SWsoft
+ * All rights reserved.
+ *
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <asm/semaphore.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/dcache.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/writeback.h>
+#include <linux/gfp.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <linux/proc_fs.h>
+#include <linux/quota.h>
+#include <linux/vzctl_quota.h>
+#include <linux/vzquota.h>
+
+
+/* ----------------------------------------------------------------------
+ * Switching quota on.
+ * --------------------------------------------------------------------- */
+
+/*
+ * check limits copied from user
+ */
+int vzquota_check_sane_limits(struct dq_stat *qstat)
+{
+	int err;
+
+	err = -EINVAL;
+
+	/* softlimit must be less then hardlimit */
+	if (qstat->bsoftlimit > qstat->bhardlimit)
+		goto out;
+
+	if (qstat->isoftlimit > qstat->ihardlimit)
+		goto out;
+
+	err = 0;
+out:
+	return err;
+}
+
+/*
+ * check usage values copied from user
+ */
+int vzquota_check_sane_values(struct dq_stat *qstat)
+{
+	int err;
+
+	err = -EINVAL;
+
+	/* expiration time must not be set if softlimit was not exceeded */
+	if (qstat->bcurrent < qstat->bsoftlimit && qstat->btime != 0)
+		goto out;
+
+	if (qstat->icurrent < qstat->isoftlimit && qstat->itime != 0)
+		goto out;
+
+	err = vzquota_check_sane_limits(qstat);
+out:
+	return err;
+}
+
+/*
+ * create new quota master block
+ * this function should:
+ *  - copy limits and usage parameters from user buffer;
+ *  - allock, initialize quota block and insert it to hash;
+ */
+static int vzquota_create(unsigned int quota_id,
+		struct vz_quota_stat __user *u_qstat, int compat)
+{
+	int err;
+	struct vz_quota_stat qstat;
+	struct vz_quota_master *qmblk;
+
+	down(&vz_quota_sem);
+
+	err = -EFAULT;
+	if (!compat) {
+		if (copy_from_user(&qstat, u_qstat, sizeof(qstat)))
+			goto out;
+	} else {
+#ifdef CONFIG_COMPAT
+		struct compat_vz_quota_stat cqstat;
+		if (copy_from_user(&cqstat, u_qstat, sizeof(cqstat)))
+			goto out;
+		compat_dqstat2dqstat(&cqstat.dq_stat, &qstat.dq_stat);
+		compat_dqinfo2dqinfo(&cqstat.dq_info, &qstat.dq_info);
+#endif
+	}
+
+	err = -EINVAL;
+	if (quota_id == 0)
+		goto out;
+
+	if (vzquota_check_sane_values(&qstat.dq_stat))
+		goto out;
+	err = 0;
+	qmblk = vzquota_alloc_master(quota_id, &qstat);
+
+	if (IS_ERR(qmblk)) /* ENOMEM or EEXIST */
+		err = PTR_ERR(qmblk);
+out:
+	up(&vz_quota_sem);
+
+	return err;
+}
+
+/**
+ * vzquota_on - turn quota on
+ *
+ * This function should:
+ *  - find and get refcnt of directory entry for quota root and corresponding
+ *    mountpoint;
+ *  - find corresponding quota block and mark it with given path;
+ *  - check quota tree;
+ *  - initialize quota for the tree root.
+ */
+static int vzquota_on(unsigned int quota_id, const char __user *quota_root)
+{
+	int err;
+	struct nameidata nd;
+	struct vz_quota_master *qmblk;
+	struct super_block *dqsb;
+
+	dqsb = NULL;
+	down(&vz_quota_sem);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EBUSY;
+	if (qmblk->dq_state != VZDQ_STARTING)
+		goto out;
+
+	err = user_path_walk(quota_root, &nd);
+	if (err)
+		goto out;
+	/* init path must be a directory */
+	err = -ENOTDIR;
+	if (!S_ISDIR(nd.dentry->d_inode->i_mode))
+		goto out_path;
+
+	qmblk->dq_root_dentry = nd.dentry;
+	qmblk->dq_root_mnt = nd.mnt;
+	qmblk->dq_sb = nd.dentry->d_inode->i_sb;
+	err = vzquota_get_super(qmblk->dq_sb);
+	if (err)
+		goto out_super;
+
+	/*
+	 * Serialization with quota initialization and operations is performed
+	 * through generation check: generation is memorized before qmblk is
+	 * found and compared under inode_qmblk_lock with assignment.
+	 *
+	 * Note that the dentry tree is shrunk only for high-level logical
+	 * serialization, purely as a courtesy to the user: to have consistent
+	 * quota statistics, files should be closed etc. on quota on.
+	 */
+	err = vzquota_on_qmblk(qmblk->dq_sb, qmblk->dq_root_dentry->d_inode,
+			qmblk);
+	if (err)
+		goto out_init;
+	qmblk->dq_state = VZDQ_WORKING;
+
+	up(&vz_quota_sem);
+	return 0;
+
+out_init:
+	dqsb = qmblk->dq_sb;
+out_super:
+	/* clear for qmblk_put/quota_free_master */
+	qmblk->dq_sb = NULL;
+	qmblk->dq_root_dentry = NULL;
+	qmblk->dq_root_mnt = NULL;
+out_path:
+	path_release(&nd);
+out:
+	if (dqsb)
+		vzquota_put_super(dqsb);
+	up(&vz_quota_sem);
+	return err;
+}
+
+
+/* ----------------------------------------------------------------------
+ * Switching quota off.
+ * --------------------------------------------------------------------- */
+
+/*
+ * destroy quota block by ID
+ */
+static int vzquota_destroy(unsigned int quota_id)
+{
+	int err;
+	struct vz_quota_master *qmblk;
+	struct dentry *dentry;
+	struct vfsmount *mnt;
+
+	down(&vz_quota_sem);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EBUSY;
+	if (qmblk->dq_state == VZDQ_WORKING)
+		goto out; /* quota_off first */
+
+	list_del_init(&qmblk->dq_hash);
+	dentry = qmblk->dq_root_dentry;
+	qmblk->dq_root_dentry = NULL;
+	mnt = qmblk->dq_root_mnt;
+	qmblk->dq_root_mnt = NULL;
+
+	if (qmblk->dq_sb)
+		vzquota_put_super(qmblk->dq_sb);
+	up(&vz_quota_sem);
+
+	qmblk_put(qmblk);
+	dput(dentry);
+	mntput(mnt);
+	return 0;
+
+out:
+	up(&vz_quota_sem);
+	return err;
+}
+
+/**
+ * vzquota_off - turn quota off
+ */
+
+static int __vzquota_sync_list(struct list_head *lh,
+		struct vz_quota_master *qmblk,
+		enum writeback_sync_modes sync_mode)
+{
+	struct writeback_control wbc;
+	LIST_HEAD(list);
+	struct vz_quota_ilink *qlnk;
+	struct inode *inode;
+	int err, ret;
+
+	memset(&wbc, 0, sizeof(wbc));
+	wbc.sync_mode = sync_mode;
+
+	err = ret = 0;
+	while (!list_empty(lh)) {
+		if (need_resched()) {
+			inode_qmblk_unlock(qmblk->dq_sb);
+			schedule();
+			inode_qmblk_lock(qmblk->dq_sb);
+			continue;
+		}
+
+		qlnk = list_first_entry(lh, struct vz_quota_ilink, list);
+		list_move(&qlnk->list, &list);
+
+		inode = igrab(QLNK_INODE(qlnk));
+		if (!inode)
+			continue;
+
+		inode_qmblk_unlock(qmblk->dq_sb);
+
+		wbc.nr_to_write = LONG_MAX;
+		ret = sync_inode(inode, &wbc);
+		if (ret)
+			err = ret;
+		iput(inode);
+
+		inode_qmblk_lock(qmblk->dq_sb);
+	}
+
+	list_splice(&list, lh);
+	return err;
+}
+
+static int vzquota_sync_list(struct list_head *lh,
+		struct vz_quota_master *qmblk)
+{
+	(void)__vzquota_sync_list(lh, qmblk, WB_SYNC_NONE);
+	return __vzquota_sync_list(lh, qmblk, WB_SYNC_ALL);
+}
+
+static int vzquota_sync_inodes(struct vz_quota_master *qmblk)
+{
+	int err;
+	LIST_HEAD(qlnk_list);
+
+	list_splice_init(&qmblk->dq_ilink_list, &qlnk_list);
+	err = vzquota_sync_list(&qlnk_list, qmblk);
+	if (!err && !list_empty(&qmblk->dq_ilink_list))
+		err = -EBUSY;
+	list_splice(&qlnk_list, &qmblk->dq_ilink_list);
+
+	return err;
+}
+
+static int vzquota_off(unsigned int quota_id)
+{
+	int err, ret;
+	struct vz_quota_master *qmblk;
+
+	down(&vz_quota_sem);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EALREADY;
+	if (qmblk->dq_state != VZDQ_WORKING)
+		goto out;
+
+	inode_qmblk_lock(qmblk->dq_sb); /* protects dq_ilink_list also */
+	ret = vzquota_sync_inodes(qmblk);
+	inode_qmblk_unlock(qmblk->dq_sb);
+
+	err = vzquota_off_qmblk(qmblk->dq_sb, qmblk);
+	if (err)
+		goto out;
+
+	err = ret;
+	/* vzquota_destroy will free resources */
+	qmblk->dq_state = VZDQ_STOPING;
+out:
+	up(&vz_quota_sem);
+
+	return err;
+}
+
+
+/* ----------------------------------------------------------------------
+ * Other VZQUOTA ioctl's.
+ * --------------------------------------------------------------------- */
+
+/*
+ * this function should:
+ * - set new limits/buffer under quota master block lock
+ * - if new softlimit less then usage, then set expiration time
+ * - no need to alloc ugid hash table - we'll do that on demand
+ */
+int vzquota_update_limit(struct dq_stat *_qstat,
+		struct dq_stat *qstat)
+{
+	int err;
+
+	err = -EINVAL;
+	if (vzquota_check_sane_limits(qstat))
+		goto out;
+
+	err = 0;
+
+	/* limits */
+	_qstat->bsoftlimit = qstat->bsoftlimit;
+	_qstat->bhardlimit = qstat->bhardlimit;
+	/*
+	 * If the soft limit is exceeded, administrator can override the moment
+	 * when the grace period for limit exceeding ends.
+	 * Specifying the moment may be useful if the soft limit is set to be
+	 * lower than the current usage.  In the latter case, if the grace
+	 * period end isn't specified, the grace period will start from the
+	 * moment of the first write operation.
+	 * There is a race with the user level.  Soft limit may be already
+	 * exceeded before the limit change, and grace period end calculated by
+	 * the kernel will be overriden.  User level may check if the limit is
+	 * already exceeded, but check and set calls are not atomic.
+	 * This race isn't dangerous.  Under normal cicrumstances, the
+	 * difference between the grace period end calculated by the kernel and
+	 * the user level should be not greater than as the difference between
+	 * the moments of check and set calls, i.e. not bigger than the quota
+	 * timer resolution - 1 sec.
+	 */
+	if (qstat->btime != (time_t)0 &&
+			_qstat->bcurrent >= _qstat->bsoftlimit)
+		_qstat->btime = qstat->btime;
+
+	_qstat->isoftlimit = qstat->isoftlimit;
+	_qstat->ihardlimit = qstat->ihardlimit;
+	if (qstat->itime != (time_t)0 &&
+			_qstat->icurrent >= _qstat->isoftlimit)
+		_qstat->itime = qstat->itime;
+
+out:
+	return err;
+}
+
+/*
+ * set new quota limits.
+ * this function should:
+ *  copy new limits from user level
+ *  - find quota block
+ *  - set new limits and flags.
+ */
+static int vzquota_setlimit(unsigned int quota_id,
+		struct vz_quota_stat __user *u_qstat, int compat)
+{
+	int err;
+	struct vz_quota_stat qstat;
+	struct vz_quota_master *qmblk;
+
+	down(&vz_quota_sem); /* for hash list protection */
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EFAULT;
+	if (!compat) {
+		if (copy_from_user(&qstat, u_qstat, sizeof(qstat)))
+			goto out;
+	} else {
+#ifdef CONFIG_COMPAT
+		struct compat_vz_quota_stat cqstat;
+		if (copy_from_user(&cqstat, u_qstat, sizeof(cqstat)))
+			goto out;
+		compat_dqstat2dqstat(&cqstat.dq_stat, &qstat.dq_stat);
+		compat_dqinfo2dqinfo(&cqstat.dq_info, &qstat.dq_info);
+#endif
+	}
+
+	qmblk_data_write_lock(qmblk);
+	err = vzquota_update_limit(&qmblk->dq_stat, &qstat.dq_stat);
+	if (err == 0)
+		qmblk->dq_info = qstat.dq_info;
+	qmblk_data_write_unlock(qmblk);
+
+out:
+	up(&vz_quota_sem);
+	return err;
+}
+
+/*
+ * get quota limits.
+ * very simple - just return stat buffer to user
+ */
+static int vzquota_getstat(unsigned int quota_id,
+		struct vz_quota_stat __user *u_qstat, int compat)
+{
+	int err;
+	struct vz_quota_stat qstat;
+	struct vz_quota_master *qmblk;
+
+	down(&vz_quota_sem);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	qmblk_data_read_lock(qmblk);
+	/* copy whole buffer under lock */
+	memcpy(&qstat.dq_stat, &qmblk->dq_stat, sizeof(qstat.dq_stat));
+	memcpy(&qstat.dq_info, &qmblk->dq_info, sizeof(qstat.dq_info));
+	qmblk_data_read_unlock(qmblk);
+
+	if (!compat)
+		err = copy_to_user(u_qstat, &qstat, sizeof(qstat));
+	else {
+#ifdef CONFIG_COMPAT
+		struct compat_vz_quota_stat cqstat;
+		dqstat2compat_dqstat(&qstat.dq_stat, &cqstat.dq_stat);
+		dqinfo2compat_dqinfo(&qstat.dq_info, &cqstat.dq_info);
+		err = copy_to_user(u_qstat, &cqstat, sizeof(cqstat));
+#endif
+	}
+	if (err)
+		err = -EFAULT;
+
+out:
+	up(&vz_quota_sem);
+	return err;
+}
+
+/*
+ * This is a system call to turn per-VE disk quota on.
+ * Note this call is allowed to run ONLY from VE0
+ */
+long do_vzquotactl(int cmd, unsigned int quota_id,
+		struct vz_quota_stat __user *qstat, const char __user *ve_root,
+		int compat)
+{
+	int ret;
+
+	ret = -EPERM;
+	/* access allowed only from root of VE0 */
+	if (!capable(CAP_SYS_RESOURCE) ||
+	    !capable(CAP_SYS_ADMIN))
+		goto out;
+
+	switch (cmd) {
+		case VZ_DQ_CREATE:
+			ret = vzquota_create(quota_id, qstat, compat);
+			break;
+		case VZ_DQ_DESTROY:
+			ret = vzquota_destroy(quota_id);
+			break;
+		case VZ_DQ_ON:
+			ret = vzquota_on(quota_id, ve_root);
+			break;
+		case VZ_DQ_OFF:
+			ret = vzquota_off(quota_id);
+			break;
+		case VZ_DQ_SETLIMIT:
+			ret = vzquota_setlimit(quota_id, qstat, compat);
+			break;
+		case VZ_DQ_GETSTAT:
+			ret = vzquota_getstat(quota_id, qstat, compat);
+			break;
+
+		default:
+			ret = -EINVAL;
+			goto out;
+	}
+
+out:
+	return ret;
+}
+
+
+/* ----------------------------------------------------------------------
+ * Proc filesystem routines
+ * ---------------------------------------------------------------------*/
+
+#if defined(CONFIG_PROC_FS)
+
+#define QUOTA_UINT_LEN		15
+#define QUOTA_TIME_LEN_FMT_UINT	"%11u"
+#define QUOTA_NUM_LEN_FMT_UINT	"%15u"
+#define QUOTA_NUM_LEN_FMT_ULL	"%15Lu"
+#define QUOTA_TIME_LEN_FMT_STR	"%11s"
+#define QUOTA_NUM_LEN_FMT_STR	"%15s"
+#define QUOTA_PROC_MAX_LINE_LEN 2048
+
+/*
+ * prints /proc/ve_dq header line
+ */
+static int print_proc_header(char * buffer)
+{
+	return sprintf(buffer,
+		       "%-11s"
+		       QUOTA_NUM_LEN_FMT_STR
+		       QUOTA_NUM_LEN_FMT_STR
+		       QUOTA_NUM_LEN_FMT_STR
+		       QUOTA_TIME_LEN_FMT_STR
+		       QUOTA_TIME_LEN_FMT_STR
+		       "\n",
+		       "qid: path",
+		       "usage", "softlimit", "hardlimit", "time", "expire");
+}
+
+/*
+ * prints proc master record id, dentry path
+ */
+static int print_proc_master_id(char * buffer, char * path_buf,
+		struct vz_quota_master * qp)
+{
+	char *path;
+	int over;
+
+	path = NULL;
+	switch (qp->dq_state) {
+		case VZDQ_WORKING:
+			if (!path_buf) {
+				path = "";
+				break;
+			}
+			path = d_path(qp->dq_root_dentry,
+				      qp->dq_root_mnt, path_buf, PAGE_SIZE);
+			if (IS_ERR(path)) {
+				path = "";
+				break;
+			}
+			/* do not print large path, truncate it */
+			over = strlen(path) -
+				(QUOTA_PROC_MAX_LINE_LEN - 3 - 3 -
+				 	QUOTA_UINT_LEN);
+			if (over > 0) {
+				path += over - 3;
+				path[0] = path[1] = path[3] = '.';
+			}
+			break;
+		case VZDQ_STARTING:
+			path = "-- started --";
+			break;
+		case VZDQ_STOPING:
+			path = "-- stopped --";
+			break;
+	}
+
+	return sprintf(buffer, "%u: %s\n", qp->dq_id, path);
+}
+
+/*
+ * prints struct vz_quota_stat data
+ */
+static int print_proc_stat(char * buffer, struct dq_stat *qs,
+		struct dq_info *qi)
+{
+	return sprintf(buffer,
+		       "%11s"
+		       QUOTA_NUM_LEN_FMT_ULL
+		       QUOTA_NUM_LEN_FMT_ULL
+		       QUOTA_NUM_LEN_FMT_ULL
+		       QUOTA_TIME_LEN_FMT_UINT
+		       QUOTA_TIME_LEN_FMT_UINT
+		       "\n"
+		       "%11s"
+		       QUOTA_NUM_LEN_FMT_UINT
+		       QUOTA_NUM_LEN_FMT_UINT
+		       QUOTA_NUM_LEN_FMT_UINT
+		       QUOTA_TIME_LEN_FMT_UINT
+		       QUOTA_TIME_LEN_FMT_UINT
+		       "\n",
+		       "1k-blocks",
+		       (unsigned long long)qs->bcurrent >> 10,
+		       (unsigned long long)qs->bsoftlimit >> 10,
+		       (unsigned long long)qs->bhardlimit >> 10,
+		       (unsigned int)qs->btime,
+		       (unsigned int)qi->bexpire,
+		       "inodes",
+		       qs->icurrent,
+		       qs->isoftlimit,
+		       qs->ihardlimit,
+		       (unsigned int)qs->itime,
+		       (unsigned int)qi->iexpire);
+}
+
+
+/*
+ * for /proc filesystem output
+ */
+static int vzquota_read_proc(char *page, char **start, off_t off, int count,
+			   int *eof, void *data)
+{
+	int len, i;
+	off_t printed = 0;
+	char *p = page;
+	struct vz_quota_master *qp;
+	struct vz_quota_ilink *ql2;
+	struct list_head *listp;
+	char *path_buf;
+
+	path_buf = (char*)__get_free_page(GFP_KERNEL);
+	if (path_buf == NULL)
+		return -ENOMEM;
+
+	len = print_proc_header(p);
+	printed += len;
+	if (off < printed) /* keep header in output */ {
+		*start = p + off;
+		p += len;
+	}
+
+	down(&vz_quota_sem);
+
+	/* traverse master hash table for all records */
+	for (i = 0; i < vzquota_hash_size; i++) {
+		list_for_each(listp, &vzquota_hash_table[i]) {
+			qp = list_entry(listp,
+					struct vz_quota_master, dq_hash);
+
+			/* Skip other VE's information if not root of VE0 */
+			if ((!capable(CAP_SYS_ADMIN) ||
+			     !capable(CAP_SYS_RESOURCE))) {
+				ql2 = INODE_QLNK(current->fs->root->d_inode);
+				if (ql2 == NULL || qp != ql2->qmblk)
+					continue;
+			}
+			/*
+			 * Now print the next record
+			 */
+			len = 0;
+			/* we print quotaid and path only in VE0 */
+			if (capable(CAP_SYS_ADMIN))
+				len += print_proc_master_id(p+len,path_buf, qp);
+			len += print_proc_stat(p+len, &qp->dq_stat,
+					&qp->dq_info);
+			printed += len;
+			/* skip unnecessary lines */
+			if (printed <= off)
+				continue;
+			p += len;
+			/* provide start offset */
+			if (*start == NULL)
+				*start = p + (off - printed);
+			/* have we printed all requested size? */
+			if (PAGE_SIZE - (p - page) < QUOTA_PROC_MAX_LINE_LEN ||
+			    (p - *start) >= count)
+				goto out;
+		}
+	}
+
+	*eof = 1; /* checked all hash */
+out:
+	up(&vz_quota_sem);
+
+	len = 0;
+	if (*start != NULL) {
+		len = (p - *start);
+		if (len > count)
+			len = count;
+	}
+
+	if (path_buf)
+		free_page((unsigned long) path_buf);
+
+	return len;
+}
+
+/*
+ * Register procfs read callback
+ */
+int vzquota_proc_init(void)
+{
+	struct proc_dir_entry *de;
+
+	de = create_proc_entry_mod("vz/vzquota", S_IFREG|S_IRUSR, NULL,
+			THIS_MODULE);
+	if (de == NULL) {
+		/* create "vz" subdirectory, if not exist */
+		de = create_proc_entry("vz", S_IFDIR|S_IRUGO|S_IXUGO, NULL);
+		if (de == NULL)
+			goto out_err;
+		de = create_proc_entry_mod("vzquota", S_IFREG|S_IRUSR, de,
+				THIS_MODULE);
+		if (de == NULL)
+			goto out_err;
+	}
+	de->read_proc = vzquota_read_proc;
+	de->data = NULL;
+	return 0;
+out_err:
+	return -EBUSY;
+}
+
+void vzquota_proc_release(void)
+{
+	/* Unregister procfs read callback */
+	remove_proc_entry("vz/vzquota", NULL);
+}
+
+#endif
diff -upr linux-2.6.16.46-0.12.orig/fs/vzdq_ops.c linux-2.6.16.46-0.12-027test011/fs/vzdq_ops.c
--- linux-2.6.16.46-0.12.orig/fs/vzdq_ops.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/vzdq_ops.c	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,635 @@
+/*
+ * Copyright (C) 2001, 2002, 2004, 2005  SWsoft
+ * All rights reserved.
+ *
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <asm/semaphore.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/quota.h>
+#include <linux/vzquota.h>
+
+
+/* ----------------------------------------------------------------------
+ * Quota superblock operations - helper functions.
+ * --------------------------------------------------------------------- */
+
+static inline void vzquota_incr_inodes(struct dq_stat *dqstat,
+		unsigned long number)
+{
+	dqstat->icurrent += number;
+}
+
+static inline void vzquota_incr_space(struct dq_stat *dqstat,
+		__u64 number)
+{
+	dqstat->bcurrent += number;
+}
+
+static inline void vzquota_decr_inodes(struct dq_stat *dqstat,
+		unsigned long number)
+{
+	if (dqstat->icurrent > number)
+		dqstat->icurrent -= number;
+	else
+		dqstat->icurrent = 0;
+	if (dqstat->icurrent < dqstat->isoftlimit)
+		dqstat->itime = (time_t) 0;
+}
+
+static inline void vzquota_decr_space(struct dq_stat *dqstat,
+		__u64 number)
+{
+	if (dqstat->bcurrent > number)
+		dqstat->bcurrent -= number;
+	else
+		dqstat->bcurrent = 0;
+	if (dqstat->bcurrent < dqstat->bsoftlimit)
+		dqstat->btime = (time_t) 0;
+}
+
+/*
+ * better printk() message or use /proc/vzquotamsg interface
+ * similar to /proc/kmsg
+ */
+static inline void vzquota_warn(struct dq_info *dq_info, int dq_id, int flag,
+		const char *fmt)
+{
+	if (dq_info->flags & flag) /* warning already printed for this
+				       masterblock */
+		return;
+	printk(fmt, dq_id);
+	dq_info->flags |= flag;
+}
+
+/*
+ * ignore_hardlimit -
+ *
+ * Intended to allow superuser of VE0 to overwrite hardlimits.
+ *
+ * ignore_hardlimit() has a very bad feature:
+ *
+ *	writepage() operation for writable mapping of a file with holes
+ *	may trigger get_block() with wrong current and as a consequence,
+ *	opens a possibility to overcommit hardlimits
+ */
+/* for the reason above, it is disabled now */
+static inline int ignore_hardlimit(struct dq_info *dqstat)
+{
+#if 0
+	return	ve_is_super(get_exec_env()) &&
+		capable(CAP_SYS_RESOURCE) &&
+		(dqstat->options & VZ_QUOTA_OPT_RSQUASH);
+#else
+	return 0;
+#endif
+}
+
+static int vzquota_check_inodes(struct dq_info *dq_info,
+		struct dq_stat *dqstat,
+		unsigned long number, int dq_id)
+{
+	if (number == 0)
+		return QUOTA_OK;
+
+	if (dqstat->icurrent + number > dqstat->ihardlimit &&
+	    !ignore_hardlimit(dq_info)) {
+		vzquota_warn(dq_info, dq_id, VZ_QUOTA_INODES,
+			   "VZ QUOTA: file hardlimit reached for id=%d\n");
+		return NO_QUOTA;
+	}
+
+	if (dqstat->icurrent + number > dqstat->isoftlimit) {
+		if (dqstat->itime == (time_t)0) {
+			vzquota_warn(dq_info, dq_id, 0,
+				"VZ QUOTA: file softlimit exceeded "
+				"for id=%d\n");
+			dqstat->itime = CURRENT_TIME_SECONDS +
+				dq_info->iexpire;
+		} else if (CURRENT_TIME_SECONDS >= dqstat->itime &&
+			   !ignore_hardlimit(dq_info)) {
+			vzquota_warn(dq_info, dq_id, VZ_QUOTA_INODES,
+				"VZ QUOTA: file softlimit expired "
+				"for id=%d\n");
+			return NO_QUOTA;
+		}
+	}
+
+	return QUOTA_OK;
+}
+
+static int vzquota_check_space(struct dq_info *dq_info,
+		struct dq_stat *dqstat,
+		__u64 number, int dq_id, char prealloc)
+{
+	if (number == 0)
+		return QUOTA_OK;
+
+	if (prealloc == DQUOT_CMD_FORCE)
+		return QUOTA_OK;
+
+	if (dqstat->bcurrent + number > dqstat->bhardlimit &&
+	    !ignore_hardlimit(dq_info)) {
+		if (!prealloc)
+			vzquota_warn(dq_info, dq_id, VZ_QUOTA_SPACE,
+				"VZ QUOTA: disk hardlimit reached "
+				"for id=%d\n");
+		return NO_QUOTA;
+	}
+
+	if (dqstat->bcurrent + number > dqstat->bsoftlimit) {
+		if (dqstat->btime == (time_t)0) {
+			if (!prealloc) {
+				vzquota_warn(dq_info, dq_id, 0,
+					"VZ QUOTA: disk softlimit exceeded "
+					"for id=%d\n");
+				dqstat->btime = CURRENT_TIME_SECONDS
+							+ dq_info->bexpire;
+			} else {
+				/*
+				 * Original Linux quota doesn't allow
+				 * preallocation to exceed softlimit so
+				 * exceeding will be always printed
+				 */
+				return NO_QUOTA;
+			}
+		} else if (CURRENT_TIME_SECONDS >= dqstat->btime &&
+			   !ignore_hardlimit(dq_info)) {
+			if (!prealloc)
+				vzquota_warn(dq_info, dq_id, VZ_QUOTA_SPACE,
+					"VZ QUOTA: disk quota "
+					"softlimit expired "
+					"for id=%d\n");
+			return NO_QUOTA;
+		}
+	}
+
+	return QUOTA_OK;
+}
+
+#ifdef CONFIG_VZ_QUOTA_UGID
+static int vzquota_check_ugid_inodes(struct vz_quota_master *qmblk,
+		struct vz_quota_ugid *qugid[],
+		int type, unsigned long number)
+{
+	struct dq_info *dqinfo;
+	struct dq_stat *dqstat;
+
+	if (qugid[type] == NULL)
+		return QUOTA_OK;
+	if (qugid[type] == VZ_QUOTA_UGBAD)
+		return NO_QUOTA;
+
+	if (type == USRQUOTA && !(qmblk->dq_flags & VZDQ_USRQUOTA))
+		return QUOTA_OK;
+	if (type == GRPQUOTA && !(qmblk->dq_flags & VZDQ_GRPQUOTA))
+		return QUOTA_OK;
+	if (number == 0)
+		return QUOTA_OK;
+
+	dqinfo = &qmblk->dq_ugid_info[type];
+	dqstat = &qugid[type]->qugid_stat;
+
+	if (dqstat->ihardlimit != 0 &&
+	    dqstat->icurrent + number > dqstat->ihardlimit)
+		return NO_QUOTA;
+
+	if (dqstat->isoftlimit != 0 &&
+	    dqstat->icurrent + number > dqstat->isoftlimit) {
+		if (dqstat->itime == (time_t)0)
+			dqstat->itime = CURRENT_TIME_SECONDS +
+				dqinfo->iexpire;
+		else if (CURRENT_TIME_SECONDS >= dqstat->itime)
+			return NO_QUOTA;
+	}
+
+	return QUOTA_OK;
+}
+
+static int vzquota_check_ugid_space(struct vz_quota_master *qmblk,
+		struct vz_quota_ugid *qugid[],
+		int type, __u64 number, char prealloc)
+{
+	struct dq_info *dqinfo;
+	struct dq_stat *dqstat;
+
+
+	if (prealloc == DQUOT_CMD_FORCE)
+		return QUOTA_OK;
+
+	if (qugid[type] == NULL)
+		return QUOTA_OK;
+	if (qugid[type] == VZ_QUOTA_UGBAD)
+		return NO_QUOTA;
+
+	if (type == USRQUOTA && !(qmblk->dq_flags & VZDQ_USRQUOTA))
+		return QUOTA_OK;
+	if (type == GRPQUOTA && !(qmblk->dq_flags & VZDQ_GRPQUOTA))
+		return QUOTA_OK;
+	if (number == 0)
+		return QUOTA_OK;
+
+	dqinfo = &qmblk->dq_ugid_info[type];
+	dqstat = &qugid[type]->qugid_stat;
+
+	if (dqstat->bhardlimit != 0 &&
+	    dqstat->bcurrent + number > dqstat->bhardlimit)
+		return NO_QUOTA;
+
+	if (dqstat->bsoftlimit != 0 &&
+	    dqstat->bcurrent + number > dqstat->bsoftlimit) {
+		if (dqstat->btime == (time_t)0) {
+			if (!prealloc)
+				dqstat->btime = CURRENT_TIME_SECONDS
+							+ dqinfo->bexpire;
+			else
+				/*
+				 * Original Linux quota doesn't allow
+				 * preallocation to exceed softlimit so
+				 * exceeding will be always printed
+				 */
+				return NO_QUOTA;
+		} else if (CURRENT_TIME_SECONDS >= dqstat->btime)
+			return NO_QUOTA;
+	}
+
+	return QUOTA_OK;
+}
+#endif
+
+/* ----------------------------------------------------------------------
+ * Quota superblock operations
+ * --------------------------------------------------------------------- */
+
+/*
+ * S_NOQUOTA note.
+ * In the current kernel (2.6.8.1), S_NOQUOTA flag is set only for
+ *  - quota file (absent in our case)
+ *  - after explicit DQUOT_DROP (earlier than clear_inode) in functions like
+ *    filesystem-specific new_inode, before the inode gets outside links.
+ * For the latter case, the only quota operation where care about S_NOQUOTA
+ * might be required is vzquota_drop, but there S_NOQUOTA has already been
+ * checked in DQUOT_DROP().
+ * So, S_NOQUOTA may be ignored for now in the VZDQ code.
+ *
+ * The above note is not entirely correct.
+ * Both for ext2 and ext3 filesystems, DQUOT_FREE_INODE is called from
+ * delete_inode if new_inode fails (for example, because of inode quota
+ * limits), so S_NOQUOTA check is needed in free_inode.
+ * This seems to be the dark corner of the current quota API.
+ */
+
+/*
+ * Initialize quota operations for the specified inode.
+ */
+static int vzquota_initialize(struct inode *inode, int type)
+{
+	vzquota_inode_init_call(inode);
+	return 0; /* ignored by caller */
+}
+
+/*
+ * Release quota for the specified inode.
+ */
+static int vzquota_drop(struct inode *inode)
+{
+	vzquota_inode_drop_call(inode);
+	return 0; /* ignored by caller */
+}
+
+/*
+ * Allocate block callback.
+ *
+ * If (prealloc) disk quota exceeding warning is not printed.
+ * See Linux quota to know why.
+ *
+ * Return:
+ *	QUOTA_OK == 0 on SUCCESS
+ *	NO_QUOTA == 1 if allocation should fail
+ */
+static int vzquota_alloc_space(struct inode *inode,
+			     qsize_t number, int prealloc)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_datast data;
+	int ret = QUOTA_OK;
+
+	qmblk = vzquota_inode_data(inode, &data);
+	if (qmblk == VZ_QUOTA_BAD)
+		return NO_QUOTA;
+	if (qmblk != NULL) {
+#ifdef CONFIG_VZ_QUOTA_UGID
+		int cnt;
+		struct vz_quota_ugid * qugid[MAXQUOTAS];
+#endif
+
+		/* checking first */
+		ret = vzquota_check_space(&qmblk->dq_info, &qmblk->dq_stat,
+				number, qmblk->dq_id, prealloc);
+		if (ret == NO_QUOTA)
+			goto no_quota;
+#ifdef CONFIG_VZ_QUOTA_UGID
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			qugid[cnt] = INODE_QLNK(inode)->qugid[cnt];
+			ret = vzquota_check_ugid_space(qmblk, qugid,
+					cnt, number, prealloc);
+			if (ret == NO_QUOTA)
+				goto no_quota;
+		}
+		/* check ok, may increment */
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			if (qugid[cnt] == NULL)
+				continue;
+			vzquota_incr_space(&qugid[cnt]->qugid_stat, number);
+		}
+#endif
+		vzquota_incr_space(&qmblk->dq_stat, number);
+		vzquota_data_unlock(inode, &data);
+	}
+
+	inode_add_bytes(inode, number);
+	might_sleep();
+	return QUOTA_OK;
+
+no_quota:
+	vzquota_data_unlock(inode, &data);
+	return NO_QUOTA;
+}
+
+/*
+ * Allocate inodes callback.
+ *
+ * Return:
+ *	QUOTA_OK == 0 on SUCCESS
+ *	NO_QUOTA == 1 if allocation should fail
+ */
+static int vzquota_alloc_inode(const struct inode *inode, unsigned long number)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_datast data;
+	int ret = QUOTA_OK;
+
+	qmblk = vzquota_inode_data((struct inode *)inode, &data);
+	if (qmblk == VZ_QUOTA_BAD)
+		return NO_QUOTA;
+	if (qmblk != NULL) {
+#ifdef CONFIG_VZ_QUOTA_UGID
+		int cnt;
+		struct vz_quota_ugid *qugid[MAXQUOTAS];
+#endif
+
+		/* checking first */
+		ret = vzquota_check_inodes(&qmblk->dq_info, &qmblk->dq_stat,
+				number, qmblk->dq_id);
+		if (ret == NO_QUOTA)
+			goto no_quota;
+#ifdef CONFIG_VZ_QUOTA_UGID
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			qugid[cnt] = INODE_QLNK(inode)->qugid[cnt];
+			ret = vzquota_check_ugid_inodes(qmblk, qugid,
+					cnt, number);
+			if (ret == NO_QUOTA)
+				goto no_quota;
+		}
+		/* check ok, may increment */
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			if (qugid[cnt] == NULL)
+				continue;
+			vzquota_incr_inodes(&qugid[cnt]->qugid_stat, number);
+		}
+#endif
+		vzquota_incr_inodes(&qmblk->dq_stat, number);
+		vzquota_data_unlock((struct inode *)inode, &data);
+	}
+
+	might_sleep();
+	return QUOTA_OK;
+
+no_quota:
+	vzquota_data_unlock((struct inode *)inode, &data);
+	return NO_QUOTA;
+}
+
+/*
+ * Free space callback.
+ */
+static int vzquota_free_space(struct inode *inode, qsize_t number)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_datast data;
+
+	qmblk = vzquota_inode_data(inode, &data);
+	if (qmblk == VZ_QUOTA_BAD)
+		return NO_QUOTA; /* isn't checked by the caller */
+	if (qmblk != NULL) {
+#ifdef CONFIG_VZ_QUOTA_UGID
+		int cnt;
+		struct vz_quota_ugid * qugid;
+#endif
+
+		vzquota_decr_space(&qmblk->dq_stat, number);
+#ifdef CONFIG_VZ_QUOTA_UGID
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			qugid = INODE_QLNK(inode)->qugid[cnt];
+			if (qugid == NULL || qugid == VZ_QUOTA_UGBAD)
+				continue;
+			vzquota_decr_space(&qugid->qugid_stat, number);
+		}
+#endif
+		vzquota_data_unlock(inode, &data);
+	}
+	inode_sub_bytes(inode, number);
+	might_sleep();
+	return QUOTA_OK;
+}
+
+/*
+ * Free inodes callback.
+ */
+static int vzquota_free_inode(const struct inode *inode, unsigned long number)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_datast data;
+
+	qmblk = vzquota_inode_data((struct inode *)inode, &data);
+	if (qmblk == VZ_QUOTA_BAD)
+		return NO_QUOTA;
+	if (qmblk != NULL) {
+#ifdef CONFIG_VZ_QUOTA_UGID
+		int cnt;
+		struct vz_quota_ugid * qugid;
+#endif
+
+		vzquota_decr_inodes(&qmblk->dq_stat, number);
+#ifdef CONFIG_VZ_QUOTA_UGID
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			qugid = INODE_QLNK(inode)->qugid[cnt];
+			if (qugid == NULL || qugid == VZ_QUOTA_UGBAD)
+				continue;
+			vzquota_decr_inodes(&qugid->qugid_stat, number);
+		}
+#endif
+		vzquota_data_unlock((struct inode *)inode, &data);
+	}
+	might_sleep();
+	return QUOTA_OK;
+}
+
+void vzquota_inode_off(struct inode * inode)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_datast data;
+
+	/* The call is made through virtinfo, it can be an inode
+	 * not controlled by vzquota.
+	 */
+	if (inode->i_sb->dq_op != &vz_quota_operations)
+		return;
+
+	qmblk = vzquota_inode_data(inode, &data);
+	if (qmblk == VZ_QUOTA_BAD)
+		return;
+
+	if (qmblk == NULL) {
+		/* Tricky place. If qmblk == NULL, it means that this inode
+		 * is not in area controlled by vzquota (except for rare
+		 * case of already set S_NOQUOTA). But we have to set
+		 * S_NOQUOTA in any case because vzquota can be turned
+		 * on later, when this inode is invalid from viewpoint
+		 * of vzquota.
+		 *
+		 * To be safe, we reacquire vzquota lock.
+		 */
+		inode_qmblk_lock(inode->i_sb);
+		inode->i_flags |= S_NOQUOTA;
+		inode_qmblk_unlock(inode->i_sb);
+		return;
+	} else {
+		loff_t bytes = inode_get_bytes(inode);
+#ifdef CONFIG_VZ_QUOTA_UGID
+		int cnt;
+		struct vz_quota_ugid * qugid;
+#endif
+
+		inode->i_flags |= S_NOQUOTA;
+
+		vzquota_decr_space(&qmblk->dq_stat, bytes);
+		vzquota_decr_inodes(&qmblk->dq_stat, 1);
+#ifdef CONFIG_VZ_QUOTA_UGID
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			qugid = INODE_QLNK(inode)->qugid[cnt];
+			if (qugid == NULL || qugid == VZ_QUOTA_UGBAD)
+				continue;
+			vzquota_decr_space(&qugid->qugid_stat, bytes);
+			vzquota_decr_inodes(&qugid->qugid_stat, 1);
+		}
+#endif
+
+		vzquota_data_unlock(inode, &data);
+
+		vzquota_inode_drop_call(inode);
+	}
+}
+
+
+#ifdef CONFIG_VZ_QUOTA_UGID
+
+/*
+ * helper function for quota_transfer
+ * check that we can add inode to this quota_id
+ */
+static int vzquota_transfer_check(struct vz_quota_master *qmblk,
+		struct vz_quota_ugid *qugid[],
+		unsigned int type, __u64 size)
+{
+	if (vzquota_check_ugid_space(qmblk, qugid, type, size, 0) != QUOTA_OK ||
+	    vzquota_check_ugid_inodes(qmblk, qugid, type, 1) != QUOTA_OK)
+		return -1;
+	return 0;
+}
+
+int vzquota_transfer_usage(struct inode *inode,
+		int mask,
+		struct vz_quota_ilink *qlnk)
+{
+	struct vz_quota_ugid *qugid_old;
+	__u64 space;
+	int i;
+
+	space = inode_get_bytes(inode);
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if (!(mask & (1 << i)))
+			continue;
+		/*
+		 * Do not permit chown a file if its owner does not have
+		 * ugid record. This might happen if we somehow exceeded
+		 * the UID/GID (e.g. set uglimit less than number of users).
+		 */
+		if (INODE_QLNK(inode)->qugid[i] == VZ_QUOTA_UGBAD)
+			return -1;
+		if (vzquota_transfer_check(qlnk->qmblk, qlnk->qugid, i, space))
+			return -1;
+	}
+
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if (!(mask & (1 << i)))
+			continue;
+		qugid_old = INODE_QLNK(inode)->qugid[i];
+		vzquota_decr_space(&qugid_old->qugid_stat, space);
+		vzquota_decr_inodes(&qugid_old->qugid_stat, 1);
+		vzquota_incr_space(&qlnk->qugid[i]->qugid_stat, space);
+		vzquota_incr_inodes(&qlnk->qugid[i]->qugid_stat, 1);
+	}
+	return 0;
+}
+
+/*
+ * Transfer the inode between diffent user/group quotas.
+ */
+static int vzquota_transfer(struct inode *inode, struct iattr *iattr)
+{
+	return vzquota_inode_transfer_call(inode, iattr) ?
+		NO_QUOTA : QUOTA_OK;
+}
+
+#else /* CONFIG_VZ_QUOTA_UGID */
+
+static int vzquota_transfer(struct inode *inode, struct iattr *iattr)
+{
+	return QUOTA_OK;
+}
+
+#endif
+
+/*
+ * Called under following semaphores:
+ *	old_d->d_inode->i_sb->s_vfs_rename_sem
+ *	old_d->d_inode->i_sem
+ *	new_d->d_inode->i_sem
+ * [not verified  --SAW]
+ */
+static int vzquota_rename(struct inode *inode,
+		struct inode *old_dir, struct inode *new_dir)
+{
+	return vzquota_rename_check(inode, old_dir, new_dir) ?
+		NO_QUOTA : QUOTA_OK;
+}
+
+/*
+ * Structure of superblock diskquota operations.
+ */
+struct dquot_operations vz_quota_operations = {
+	.initialize	= vzquota_initialize,
+	.drop		= vzquota_drop,
+	.alloc_space	= vzquota_alloc_space,
+	.alloc_inode	= vzquota_alloc_inode,
+	.free_space	= vzquota_free_space,
+	.free_inode	= vzquota_free_inode,
+	.transfer	= vzquota_transfer,
+	.rename		= vzquota_rename,
+};
diff -upr linux-2.6.16.46-0.12.orig/fs/vzdq_tree.c linux-2.6.16.46-0.12-027test011/fs/vzdq_tree.c
--- linux-2.6.16.46-0.12.orig/fs/vzdq_tree.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/vzdq_tree.c	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,286 @@
+/*
+ *
+ * Copyright (C) 2005  SWsoft
+ * All rights reserved.
+ *
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * This file contains Virtuozzo quota tree implementation
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/vzdq_tree.h>
+
+struct quotatree_tree *quotatree_alloc(void)
+{
+	int l;
+	struct quotatree_tree *tree;
+
+	tree = kmalloc(sizeof(struct quotatree_tree), GFP_KERNEL);
+	if (tree == NULL)
+		goto out;
+
+	for (l = 0; l < QUOTATREE_DEPTH; l++) {
+		INIT_LIST_HEAD(&tree->levels[l].usedlh);
+		INIT_LIST_HEAD(&tree->levels[l].freelh);
+		tree->levels[l].freenum = 0;
+	}
+	tree->root = NULL;
+	tree->leaf_num = 0;
+out:
+	return tree;
+}
+
+static struct quotatree_node *
+quotatree_follow(struct quotatree_tree *tree, quotaid_t id, int level,
+		struct quotatree_find_state *st)
+{
+	void **block;
+	struct quotatree_node *parent;
+	int l, index;
+
+	parent = NULL;
+	block = (void **)&tree->root;
+	l = 0;
+	while (l < level && *block != NULL) {
+		index = (id >>  QUOTATREE_BSHIFT(l)) & QUOTATREE_BMASK;
+		parent = *block;
+		block = parent->blocks + index;
+		l++;
+	}
+	if (st != NULL) {
+		st->block = block;
+		st->level = l;
+	}
+
+	return parent;
+}
+
+void *quotatree_find(struct quotatree_tree *tree, quotaid_t id,
+		struct quotatree_find_state *st)
+{
+	quotatree_follow(tree, id, QUOTATREE_DEPTH, st);
+	if (st->level == QUOTATREE_DEPTH)
+		return *st->block;
+	else
+		return NULL;
+}
+
+void *quotatree_leaf_byindex(struct quotatree_tree *tree, unsigned int index)
+{
+	int i, count;
+	struct quotatree_node *p;
+	void *leaf;
+
+	if (QTREE_LEAFNUM(tree) <= index)
+		return NULL;
+
+	count = 0;
+	list_for_each_entry(p, &QTREE_LEAFLVL(tree)->usedlh, list) {
+		for (i = 0; i < QUOTATREE_BSIZE; i++) {
+			leaf = p->blocks[i];
+			if (leaf == NULL)
+				continue;
+			if (count == index)
+				return leaf;
+			count++;
+		}
+	}
+	return NULL;
+}
+
+/* returns data leaf (vz_quota_ugid) after _existent_ ugid (@id)
+ * in the tree... */
+void *quotatree_get_next(struct quotatree_tree *tree, quotaid_t id)
+{
+	int off;
+	struct quotatree_node *parent, *p;
+	struct list_head *lh;
+
+	/* get parent refering correct quota tree node of the last level */
+	parent = quotatree_follow(tree, id, QUOTATREE_DEPTH, NULL);
+	if (!parent)
+		return NULL;
+
+	off = (id & QUOTATREE_BMASK) + 1;	/* next ugid */
+	lh = &parent->list;
+	do {
+		p = list_entry(lh, struct quotatree_node, list);
+		for ( ; off < QUOTATREE_BSIZE; off++)
+			if (p->blocks[off])
+				return p->blocks[off];
+		off = 0;
+		lh = lh->next;
+	} while (lh != &QTREE_LEAFLVL(tree)->usedlh);
+
+	return NULL;
+}
+
+int quotatree_insert(struct quotatree_tree *tree, quotaid_t id,
+		struct quotatree_find_state *st, void *data)
+{
+	struct quotatree_node *p;
+	int l, index;
+
+	while (st->level < QUOTATREE_DEPTH) {
+		l = st->level;
+		if (!list_empty(&tree->levels[l].freelh)) {
+			p = list_entry(tree->levels[l].freelh.next,
+					struct quotatree_node, list);
+			list_del(&p->list);
+		} else {
+			p = kmalloc(sizeof(struct quotatree_node), GFP_NOFS | __GFP_NOFAIL);
+			if (p == NULL)
+				return -ENOMEM;
+			/* save block number in the l-level
+			 * it uses for quota file generation */
+			p->num = tree->levels[l].freenum++;
+		}
+		list_add(&p->list, &tree->levels[l].usedlh);
+		memset(p->blocks, 0, sizeof(p->blocks));
+		*st->block = p;
+
+		index = (id >> QUOTATREE_BSHIFT(l)) & QUOTATREE_BMASK;
+		st->block = p->blocks + index;
+		st->level++;
+	}
+	tree->leaf_num++;
+	*st->block = data;
+
+	return 0;
+}
+
+static struct quotatree_node *
+quotatree_remove_ptr(struct quotatree_tree *tree, quotaid_t id,
+		int level)
+{
+	struct quotatree_node *parent;
+	struct quotatree_find_state st;
+
+	parent = quotatree_follow(tree, id, level, &st);
+	if (st.level == QUOTATREE_DEPTH)
+		tree->leaf_num--;
+	*st.block = NULL;
+	return parent;
+}
+
+void quotatree_remove(struct quotatree_tree *tree, quotaid_t id)
+{
+	struct quotatree_node *p;
+	int level, i;
+
+	p = quotatree_remove_ptr(tree, id, QUOTATREE_DEPTH);
+	for (level = QUOTATREE_DEPTH - 1; level >= QUOTATREE_CDEPTH; level--) {
+		for (i = 0; i < QUOTATREE_BSIZE; i++)
+			if (p->blocks[i] != NULL)
+				return;
+		list_move(&p->list, &tree->levels[level].freelh);
+		p = quotatree_remove_ptr(tree, id, level);
+	}
+}
+
+#if 0
+static void quotatree_walk(struct quotatree_tree *tree,
+		struct quotatree_node *node_start,
+		quotaid_t id_start,
+		int level_start, int level_end,
+		int (*callback)(struct quotatree_tree *,
+				quotaid_t id,
+				int level,
+				void *ptr,
+				void *data),
+		void *data)
+{
+	struct quotatree_node *p;
+	int l, shift, index;
+	quotaid_t id;
+	struct quotatree_find_state st;
+
+	p = node_start;
+	l = level_start;
+	shift = (QUOTATREE_DEPTH - l) * QUOTAID_BBITS;
+	id = id_start;
+	index = 0;
+
+	/*
+	 * Invariants:
+	 * shift == (QUOTATREE_DEPTH - l) * QUOTAID_BBITS;
+	 * id & ((1 << shift) - 1) == 0
+	 * p is l-level node corresponding to id
+	 */
+	do {
+		if (!p)
+			break;
+
+		if (l < level_end) {
+			for (; index < QUOTATREE_BSIZE; index++)
+				if (p->blocks[index] != NULL)
+					break;
+			if (index < QUOTATREE_BSIZE) {
+				/* descend */
+				p = p->blocks[index];
+				l++;
+				shift -= QUOTAID_BBITS;
+				id += (quotaid_t)index << shift;
+				index = 0;
+				continue;
+			}
+		}
+
+		if ((*callback)(tree, id, l, p, data))
+			break;
+
+		/* ascend and to the next node */
+		p = quotatree_follow(tree, id, l, &st);
+
+		index = ((id >> shift) & QUOTATREE_BMASK) + 1;
+		l--;
+		shift += QUOTAID_BBITS;
+		id &= ~(((quotaid_t)1 << shift) - 1);
+	} while (l >= level_start);
+}
+#endif
+
+static void free_list(struct list_head *node_list)
+{
+	struct quotatree_node *p, *tmp;
+
+	list_for_each_entry_safe(p, tmp, node_list, list) {
+		list_del(&p->list);
+		kfree(p);
+	}
+}
+
+static inline void quotatree_free_nodes(struct quotatree_tree *tree)
+{
+	int i;
+
+	for (i = 0; i < QUOTATREE_DEPTH; i++) {
+		free_list(&tree->levels[i].usedlh);
+		free_list(&tree->levels[i].freelh);
+	}
+}
+
+static void quotatree_free_leafs(struct quotatree_tree *tree,
+		void (*dtor)(void *))
+{
+	int i;
+	struct quotatree_node *p;
+
+	list_for_each_entry(p, &QTREE_LEAFLVL(tree)->usedlh, list) {
+		for (i = 0; i < QUOTATREE_BSIZE; i++) {
+			if (p->blocks[i] == NULL)
+				continue;
+
+			dtor(p->blocks[i]);
+		}
+	}
+}
+
+void quotatree_free(struct quotatree_tree *tree, void (*dtor)(void *))
+{
+	quotatree_free_leafs(tree, dtor);
+	quotatree_free_nodes(tree);
+	kfree(tree);
+}
diff -upr linux-2.6.16.46-0.12.orig/fs/vzdq_ugid.c linux-2.6.16.46-0.12-027test011/fs/vzdq_ugid.c
--- linux-2.6.16.46-0.12.orig/fs/vzdq_ugid.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/vzdq_ugid.c	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,1224 @@
+/*
+ * Copyright (C) 2002 SWsoft
+ * All rights reserved.
+ *
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * This file contains Virtuozzo UID/GID disk quota implementation
+ */
+
+#include <linux/config.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/smp_lock.h>
+#include <linux/rcupdate.h>
+#include <asm/uaccess.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/quota.h>
+#include <linux/quotaio_v2.h>
+#include <linux/virtinfo.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/namespace.h>
+#include <linux/vmalloc.h>
+
+#include <linux/vzctl.h>
+#include <linux/vzctl_quota.h>
+#include <linux/vzquota.h>
+
+/*
+ * XXX
+ * may be something is needed for sb->s_dquot->info[]?
+ */
+
+#define USRQUOTA_MASK		(1 << USRQUOTA)
+#define GRPQUOTA_MASK		(1 << GRPQUOTA)
+#define QTYPE2MASK(type)	(1 << (type))
+
+static kmem_cache_t *vz_quota_ugid_cachep;
+
+/* guard to protect vz_quota_master from destroy in quota_on/off. Also protects
+ * list on the hash table */
+extern struct semaphore vz_quota_sem;
+
+inline struct vz_quota_ugid *vzquota_get_ugid(struct vz_quota_ugid *qugid)
+{
+	if (qugid != VZ_QUOTA_UGBAD)
+		atomic_inc(&qugid->qugid_count);
+	return qugid;
+}
+
+/* we don't limit users with zero limits */
+static inline int vzquota_fake_stat(struct dq_stat *stat)
+{
+	return stat->bhardlimit == 0 && stat->bsoftlimit == 0 &&
+		stat->ihardlimit == 0 && stat->isoftlimit == 0;
+}
+
+/* callback function for quotatree_free() */
+static inline void vzquota_free_qugid(void *ptr)
+{
+	kmem_cache_free(vz_quota_ugid_cachep, ptr);
+}
+
+/*
+ * destroy ugid, if it have zero refcount, limits and usage
+ * must be called under qmblk->dq_sem
+ */
+void vzquota_put_ugid(struct vz_quota_master *qmblk,
+		struct vz_quota_ugid *qugid)
+{
+	if (qugid == VZ_QUOTA_UGBAD)
+		return;
+	qmblk_data_read_lock(qmblk);
+	if (atomic_dec_and_test(&qugid->qugid_count) &&
+	    (qmblk->dq_flags & VZDQUG_FIXED_SET) == 0 &&
+	    vzquota_fake_stat(&qugid->qugid_stat) &&
+	    qugid->qugid_stat.bcurrent == 0 &&
+	    qugid->qugid_stat.icurrent == 0) {
+		quotatree_remove(QUGID_TREE(qmblk, qugid->qugid_type),
+				qugid->qugid_id);
+		qmblk->dq_ugid_count--;
+		vzquota_free_qugid(qugid);
+	}
+	qmblk_data_read_unlock(qmblk);
+}
+
+/*
+ * Get ugid block by its index, like it would present in array.
+ * In reality, this is not array - this is leafs chain of the tree.
+ * NULL if index is out of range.
+ * qmblk semaphore is required to protect the tree.
+ */
+static inline struct vz_quota_ugid *
+vzquota_get_byindex(struct vz_quota_master *qmblk, unsigned int index, int type)
+{
+	return quotatree_leaf_byindex(QUGID_TREE(qmblk, type), index);
+}
+
+/*
+ * get next element from ugid "virtual array"
+ * ugid must be in current array and this array may not be changed between
+ * two accesses (quaranteed by "stopped" quota state and quota semaphore)
+ * qmblk semaphore is required to protect the tree
+ */
+static inline struct vz_quota_ugid *
+vzquota_get_next(struct vz_quota_master *qmblk, struct vz_quota_ugid *qugid)
+{
+	return quotatree_get_next(QUGID_TREE(qmblk, qugid->qugid_type),
+			qugid->qugid_id);
+}
+
+/*
+ * requires dq_sem
+ */
+struct vz_quota_ugid *__vzquota_find_ugid(struct vz_quota_master *qmblk,
+			unsigned int quota_id, int type, int flags)
+{
+	struct vz_quota_ugid *qugid;
+	struct quotatree_tree *tree;
+	struct quotatree_find_state st;
+
+	tree = QUGID_TREE(qmblk, type);
+	qugid = quotatree_find(tree, quota_id, &st);
+	if (qugid)
+		goto success;
+
+	/* caller does not want alloc */
+	if (flags & VZDQUG_FIND_DONT_ALLOC)
+		goto fail;
+
+	if (flags & VZDQUG_FIND_FAKE)
+		goto doit;
+
+	/* check limit */
+	if (qmblk->dq_ugid_count >= qmblk->dq_ugid_max)
+		goto fail;
+
+	/* see comment at VZDQUG_FIXED_SET define */
+	if (qmblk->dq_flags & VZDQUG_FIXED_SET)
+		goto fail;
+
+doit:
+	/* alloc new structure */
+	qugid = kmem_cache_alloc(vz_quota_ugid_cachep,
+			SLAB_NOFS | __GFP_NOFAIL);
+	if (qugid == NULL)
+		goto fail;
+
+	/* initialize new structure */
+	qugid->qugid_id = quota_id;
+	memset(&qugid->qugid_stat, 0, sizeof(qugid->qugid_stat));
+	qugid->qugid_type = type;
+	atomic_set(&qugid->qugid_count, 0);
+
+	/* insert in tree */
+	if (quotatree_insert(tree, quota_id, &st, qugid) < 0)
+		goto fail_insert;
+	qmblk->dq_ugid_count++;
+
+success:
+	vzquota_get_ugid(qugid);
+	return qugid;
+
+fail_insert:
+	vzquota_free_qugid(qugid);
+fail:
+	return VZ_QUOTA_UGBAD;
+}
+
+/*
+ * takes dq_sem, may schedule
+ */
+struct vz_quota_ugid *vzquota_find_ugid(struct vz_quota_master *qmblk,
+			unsigned int quota_id, int type, int flags)
+{
+	struct vz_quota_ugid *qugid;
+
+	down(&qmblk->dq_sem);
+	qugid = __vzquota_find_ugid(qmblk, quota_id, type, flags);
+	up(&qmblk->dq_sem);
+
+	return qugid;
+}
+
+/*
+ * destroy all ugid records on given quota master
+ */
+void vzquota_kill_ugid(struct vz_quota_master *qmblk)
+{
+	BUG_ON((qmblk->dq_gid_tree == NULL && qmblk->dq_uid_tree != NULL) ||
+		(qmblk->dq_uid_tree == NULL && qmblk->dq_gid_tree != NULL));
+
+	if (qmblk->dq_uid_tree != NULL) {
+		quotatree_free(qmblk->dq_uid_tree, vzquota_free_qugid);
+		quotatree_free(qmblk->dq_gid_tree, vzquota_free_qugid);
+	}
+}
+
+
+/* ----------------------------------------------------------------------
+ * Management interface to ugid quota for (super)users.
+ * --------------------------------------------------------------------- */
+
+static int vzquota_initialize2(struct inode *inode, int type)
+{
+	return QUOTA_OK;
+}
+
+static int vzquota_drop2(struct inode *inode)
+{
+	return QUOTA_OK;
+}
+
+static int vzquota_alloc_space2(struct inode *inode,
+			     qsize_t number, int prealloc)
+{
+	inode_add_bytes(inode, number);
+	return QUOTA_OK;
+}
+
+static int vzquota_alloc_inode2(const struct inode *inode, unsigned long number)
+{
+	return QUOTA_OK;
+}
+
+static int vzquota_free_space2(struct inode *inode, qsize_t number)
+{
+	inode_sub_bytes(inode, number);
+	return QUOTA_OK;
+}
+
+static int vzquota_free_inode2(const struct inode *inode, unsigned long number)
+{
+	return QUOTA_OK;
+}
+
+static int vzquota_transfer2(struct inode *inode, struct iattr *iattr)
+{
+	return QUOTA_OK;
+}
+
+struct dquot_operations vz_quota_operations2 = {
+	.initialize	= vzquota_initialize2,
+	.drop		= vzquota_drop2,
+	.alloc_space	= vzquota_alloc_space2,
+	.alloc_inode	= vzquota_alloc_inode2,
+	.free_space	= vzquota_free_space2,
+	.free_inode	= vzquota_free_inode2,
+	.transfer	= vzquota_transfer2,
+};
+
+
+asmlinkage long sys_unlink(const char __user * pathname);
+asmlinkage long sys_rename(const char __user * oldname,
+	       const char __user * newname);
+asmlinkage long sys_symlink(const char __user * oldname,
+	       const char __user * newname);
+
+/* called under sb->s_umount semaphore */
+static int vz_restore_symlink(struct super_block *sb, char *path, int type)
+{
+	mm_segment_t oldfs;
+	char *newpath;
+	char dest[64];
+	const char *names[] = {
+		[USRQUOTA] "aquota.user",
+		[GRPQUOTA] "aquota.group"
+	};
+	int err;
+
+	newpath = kmalloc(strlen(path) + sizeof(".new"), GFP_KERNEL);
+	if (newpath == NULL)
+		return -ENOMEM;
+
+	strcpy(newpath, path);
+	strcat(newpath, ".new");
+
+	sprintf(dest, "/proc/vz/vzaquota/%08x/%s",
+			new_encode_dev(sb->s_dev), names[type]);
+
+	/*
+	 * Lockdep will learn unneeded dependency while unlink(2):
+	 *	->s_umount => ->i_mutex/1 => ->i_mutex
+	 * Reverse dependency is,
+	 *	open_namei() => ->i_mutex => lookup_hash() => __lookup_hash()
+	 *	=> ->lookup() \eq vzdq_aquotq_lookup() => find_qmblk_by_dev()
+	 *	=> user_get_super() => ->s_umount
+	 *
+	 * However, first set of ->i_mutex'es belong to /, second to /proc .
+	 * Right fix is to get rid of vz_restore_symlink(), of course.
+	 */
+	up_read(&sb->s_umount);
+
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);
+	err = sys_unlink(newpath);
+	if (err < 0 && err != -ENOENT)
+		goto out_restore;
+	err = sys_symlink(dest, newpath);
+	if (err < 0)
+		goto out_restore;
+	err = sys_rename(newpath, path);
+out_restore:
+	set_fs(oldfs);
+
+	down_read(&sb->s_umount);
+	/* umounted meanwhile? */
+	if (err == 0 && !sb->s_root)
+		err = -ENODEV;
+
+	kfree(newpath);
+	return err;
+}
+
+/* called under sb->s_umount semaphore */
+static int vz_quota_on(struct super_block *sb, int type,
+		int format_id, char *path)
+{
+	struct vz_quota_master *qmblk;
+	int mask, mask2;
+	int err;
+
+	qmblk = vzquota_find_qmblk(sb);
+	err = -ESRCH;
+	if (qmblk == NULL)
+		goto out;
+	err = -EIO;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out;
+
+	err = vz_restore_symlink(sb, path, type);
+	if (err < 0)
+		goto out_put;
+
+	down(&vz_quota_sem);
+	mask = 0;
+	mask2 = 0;
+	sb->dq_op = &vz_quota_operations2;
+	sb->s_qcop = &vz_quotactl_operations;
+	if (type == USRQUOTA) {
+		mask = DQUOT_USR_ENABLED;
+		mask2 = VZDQ_USRQUOTA;
+	}
+	if (type == GRPQUOTA) {
+		mask = DQUOT_GRP_ENABLED;
+		mask2 = VZDQ_GRPQUOTA;
+	}
+	err = -EBUSY;
+	if (qmblk->dq_flags & mask2)
+		goto out_sem;
+
+	err = 0;
+	qmblk->dq_flags |= mask2;
+	sb->s_dquot.flags |= mask;
+
+out_sem:
+	up(&vz_quota_sem);
+out_put:
+	qmblk_put(qmblk);
+out:
+	return err;
+}
+
+static int vz_quota_off(struct super_block *sb, int type)
+{
+	struct vz_quota_master *qmblk;
+	int mask2;
+	int err;
+
+	qmblk = vzquota_find_qmblk(sb);
+	down(&vz_quota_sem);
+	err = -ESRCH;
+	if (qmblk == NULL)
+		goto out;
+	err = -EIO;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out;
+
+	mask2 = 0;
+	if (type == USRQUOTA)
+		mask2 = VZDQ_USRQUOTA;
+	if (type == GRPQUOTA)
+		mask2 = VZDQ_GRPQUOTA;
+	err = -EINVAL;
+	if (!(qmblk->dq_flags & mask2))
+		goto out;
+
+	qmblk->dq_flags &= ~mask2;
+	err = 0;
+
+out:
+	up(&vz_quota_sem);
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qmblk);
+	return err;
+}
+
+static int vz_quota_sync(struct super_block *sb, int type)
+{
+	return 0;	/* vz quota is always uptodate */
+}
+
+static int vz_get_dqblk(struct super_block *sb, int type,
+		qid_t id, struct if_dqblk *di)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ugid *ugid;
+	int err;
+
+	qmblk = vzquota_find_qmblk(sb);
+	down(&vz_quota_sem);
+	err = -ESRCH;
+	if (qmblk == NULL)
+		goto out;
+	err = -EIO;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out;
+
+	err = 0;
+	ugid = vzquota_find_ugid(qmblk, id, type, VZDQUG_FIND_DONT_ALLOC);
+	if (ugid != VZ_QUOTA_UGBAD) {
+		qmblk_data_read_lock(qmblk);
+		di->dqb_bhardlimit = ugid->qugid_stat.bhardlimit >> 10;
+		di->dqb_bsoftlimit = ugid->qugid_stat.bsoftlimit >> 10;
+		di->dqb_curspace = ugid->qugid_stat.bcurrent;
+		di->dqb_ihardlimit = ugid->qugid_stat.ihardlimit;
+		di->dqb_isoftlimit = ugid->qugid_stat.isoftlimit;
+		di->dqb_curinodes = ugid->qugid_stat.icurrent;
+		di->dqb_btime = ugid->qugid_stat.btime;
+		di->dqb_itime = ugid->qugid_stat.itime;
+		qmblk_data_read_unlock(qmblk);
+		di->dqb_valid = QIF_ALL;
+		vzquota_put_ugid(qmblk, ugid);
+	} else {
+		memset(di, 0, sizeof(*di));
+		di->dqb_valid = QIF_ALL;
+	}
+
+out:
+	up(&vz_quota_sem);
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qmblk);
+	return err;
+}
+
+/* must be called under vz_quota_sem */
+static int __vz_set_dqblk(struct vz_quota_master *qmblk,
+		int type, qid_t id, struct if_dqblk *di)
+{
+	struct vz_quota_ugid *ugid;
+
+	ugid = vzquota_find_ugid(qmblk, id, type, 0);
+	if (ugid == VZ_QUOTA_UGBAD)
+		return -ESRCH;
+
+	qmblk_data_write_lock(qmblk);
+	/*
+	 * Subtle compatibility breakage.
+	 *
+	 * Some old non-vz kernel quota didn't start grace period
+	 * if the new soft limit happens to be below the usage.
+	 * Non-vz kernel quota in 2.4.20 starts the grace period
+	 * (if it hasn't been started).
+	 * Current non-vz kernel performs even more complicated
+	 * manipulations...
+	 *
+	 * Also, current non-vz kernels have inconsistency related to
+	 * the grace time start.  In regular operations the grace period
+	 * is started if the usage is greater than the soft limit (and,
+	 * strangely, is cancelled if the usage is less).
+	 * However, set_dqblk starts the grace period if the usage is greater
+	 * or equal to the soft limit.
+	 *
+	 * Here we try to mimic the behavior of the current non-vz kernel.
+	 */
+	if (di->dqb_valid & QIF_BLIMITS) {
+		ugid->qugid_stat.bhardlimit =
+			(__u64)di->dqb_bhardlimit << 10;
+		ugid->qugid_stat.bsoftlimit =
+			(__u64)di->dqb_bsoftlimit << 10;
+		if (di->dqb_bsoftlimit == 0 ||
+		    ugid->qugid_stat.bcurrent < ugid->qugid_stat.bsoftlimit)
+			ugid->qugid_stat.btime = 0;
+		else if (!(di->dqb_valid & QIF_BTIME))
+			ugid->qugid_stat.btime = CURRENT_TIME_SECONDS
+				+ qmblk->dq_ugid_info[type].bexpire;
+		else
+			ugid->qugid_stat.btime = di->dqb_btime;
+	}
+	if (di->dqb_valid & QIF_ILIMITS) {
+		ugid->qugid_stat.ihardlimit = di->dqb_ihardlimit;
+		ugid->qugid_stat.isoftlimit = di->dqb_isoftlimit;
+		if (di->dqb_isoftlimit == 0 ||
+		    ugid->qugid_stat.icurrent < ugid->qugid_stat.isoftlimit)
+			ugid->qugid_stat.itime = 0;
+		else if (!(di->dqb_valid & QIF_ITIME))
+			ugid->qugid_stat.itime = CURRENT_TIME_SECONDS
+				+ qmblk->dq_ugid_info[type].iexpire;
+		else
+			ugid->qugid_stat.itime = di->dqb_itime;
+	}
+	qmblk_data_write_unlock(qmblk);
+	vzquota_put_ugid(qmblk, ugid);
+
+	return 0;
+}
+
+static int vz_set_dqblk(struct super_block *sb, int type,
+		qid_t id, struct if_dqblk *di)
+{
+	struct vz_quota_master *qmblk;
+	int err;
+
+	qmblk = vzquota_find_qmblk(sb);
+	down(&vz_quota_sem);
+	err = -ESRCH;
+	if (qmblk == NULL)
+		goto out;
+	err = -EIO;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out;
+	err = __vz_set_dqblk(qmblk, type, id, di);
+out:
+	up(&vz_quota_sem);
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qmblk);
+	return err;
+}
+
+static int vz_get_dqinfo(struct super_block *sb, int type,
+		struct if_dqinfo *ii)
+{
+	struct vz_quota_master *qmblk;
+	int err;
+
+	qmblk = vzquota_find_qmblk(sb);
+	down(&vz_quota_sem);
+	err = -ESRCH;
+	if (qmblk == NULL)
+		goto out;
+	err = -EIO;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out;
+
+	err = 0;
+	ii->dqi_bgrace = qmblk->dq_ugid_info[type].bexpire;
+	ii->dqi_igrace = qmblk->dq_ugid_info[type].iexpire;
+	ii->dqi_flags = 0;
+	ii->dqi_valid = IIF_ALL;
+
+out:
+	up(&vz_quota_sem);
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qmblk);
+	return err;
+}
+
+/* must be called under vz_quota_sem */
+static int __vz_set_dqinfo(struct vz_quota_master *qmblk,
+		int type, struct if_dqinfo *ii)
+{
+	if (ii->dqi_valid & IIF_FLAGS)
+		if (ii->dqi_flags & DQF_MASK)
+			return -EINVAL;
+
+	if (ii->dqi_valid & IIF_BGRACE)
+		qmblk->dq_ugid_info[type].bexpire = ii->dqi_bgrace;
+	if (ii->dqi_valid & IIF_IGRACE)
+		qmblk->dq_ugid_info[type].iexpire = ii->dqi_igrace;
+	return 0;
+}
+
+static int vz_set_dqinfo(struct super_block *sb, int type,
+		struct if_dqinfo *ii)
+{
+	struct vz_quota_master *qmblk;
+	int err;
+
+	qmblk = vzquota_find_qmblk(sb);
+	down(&vz_quota_sem);
+	err = -ESRCH;
+	if (qmblk == NULL)
+		goto out;
+	err = -EIO;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out;
+	err = __vz_set_dqinfo(qmblk, type, ii);
+out:
+	up(&vz_quota_sem);
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qmblk);
+	return err;
+}
+
+#ifdef CONFIG_QUOTA_COMPAT
+
+#define Q_GETQUOTI_SIZE 1024
+
+#define UGID2DQBLK(dst, src)						\
+	do {								\
+		(dst)->dqb_ihardlimit = (src)->qugid_stat.ihardlimit;	\
+		(dst)->dqb_isoftlimit = (src)->qugid_stat.isoftlimit;	\
+		(dst)->dqb_curinodes = (src)->qugid_stat.icurrent;	\
+		/* in 1K blocks */					\
+		(dst)->dqb_bhardlimit = (src)->qugid_stat.bhardlimit >> 10; \
+		/* in 1K blocks */					\
+		(dst)->dqb_bsoftlimit = (src)->qugid_stat.bsoftlimit >> 10; \
+		/* in bytes, 64 bit */					\
+		(dst)->dqb_curspace = (src)->qugid_stat.bcurrent;	\
+		(dst)->dqb_btime = (src)->qugid_stat.btime;		\
+		(dst)->dqb_itime = (src)->qugid_stat.itime;		\
+	} while (0)
+
+static int vz_get_quoti(struct super_block *sb, int type, qid_t idx,
+		struct v2_disk_dqblk __user *dqblk)
+{
+	struct vz_quota_master *qmblk;
+	struct v2_disk_dqblk *data, *kbuf;
+	struct vz_quota_ugid *ugid;
+	int count;
+	int err;
+
+	qmblk = vzquota_find_qmblk(sb);
+	err = -ESRCH;
+	if (qmblk == NULL)
+		goto out;
+	err = -EIO;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out;
+
+	err = -ENOMEM;
+	kbuf = vmalloc(Q_GETQUOTI_SIZE * sizeof(*kbuf));
+	if (!kbuf)
+		goto out;
+
+	down(&vz_quota_sem);
+	down(&qmblk->dq_sem);
+	for (ugid = vzquota_get_byindex(qmblk, idx, type), count = 0;
+		ugid != NULL && count < Q_GETQUOTI_SIZE;
+		count++)
+	{
+		data = kbuf + count;
+		qmblk_data_read_lock(qmblk);
+		UGID2DQBLK(data, ugid);
+		qmblk_data_read_unlock(qmblk);
+		data->dqb_id = ugid->qugid_id;
+
+		/* Find next entry */
+		ugid = vzquota_get_next(qmblk, ugid);
+		BUG_ON(ugid != NULL && ugid->qugid_type != type);
+	}
+	up(&qmblk->dq_sem);
+	up(&vz_quota_sem);
+
+	err = count;
+	if (copy_to_user(dqblk, kbuf, count * sizeof(*kbuf)))
+		err = -EFAULT;
+
+	vfree(kbuf);
+out:
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qmblk);
+
+	return err;
+}
+
+#endif
+
+struct quotactl_ops vz_quotactl_operations = {
+	.quota_on	= vz_quota_on,
+	.quota_off	= vz_quota_off,
+	.quota_sync	= vz_quota_sync,
+	.get_info	= vz_get_dqinfo,
+	.set_info	= vz_set_dqinfo,
+	.get_dqblk	= vz_get_dqblk,
+	.set_dqblk	= vz_set_dqblk,
+#ifdef CONFIG_QUOTA_COMPAT
+	.get_quoti	= vz_get_quoti,
+#endif
+};
+
+
+/* ----------------------------------------------------------------------
+ * Management interface for host system admins.
+ * --------------------------------------------------------------------- */
+
+static int quota_ugid_addstat(unsigned int quota_id, unsigned int ugid_size,
+		struct vz_quota_iface __user *u_ugid_buf, int compat)
+{
+	struct vz_quota_master *qmblk;
+	int ret;
+
+	down(&vz_quota_sem);
+
+	ret = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	ret = -EBUSY;
+	if (qmblk->dq_state != VZDQ_STARTING)
+		goto out; /* working quota doesn't accept new ugids */
+
+	ret = 0;
+	/* start to add ugids */
+	for (ret = 0; ret < ugid_size; ret++) {
+		struct vz_quota_iface ugid_buf;
+		struct vz_quota_ugid *ugid;
+
+		if (!compat) {
+			if (copy_from_user(&ugid_buf, u_ugid_buf,
+							sizeof(ugid_buf)))
+				break;
+			u_ugid_buf++; /* next user buffer */
+		} else {
+#ifdef CONFIG_COMPAT
+			struct compat_vz_quota_iface oqif;
+			if (copy_from_user(&oqif, u_ugid_buf,
+							sizeof(oqif)))
+				break;
+			ugid_buf.qi_id = oqif.qi_id;
+			ugid_buf.qi_type = oqif.qi_type;
+			compat_dqstat2dqstat(&oqif.qi_stat, &ugid_buf.qi_stat);
+			u_ugid_buf = (struct vz_quota_iface __user *)
+					(((void *)u_ugid_buf) + sizeof(oqif));
+#endif
+		}
+
+		if (ugid_buf.qi_type >= MAXQUOTAS)
+			break; /* bad quota type - this is the only check */
+
+		ugid = vzquota_find_ugid(qmblk,
+				ugid_buf.qi_id, ugid_buf.qi_type, 0);
+		if (ugid == VZ_QUOTA_UGBAD) {
+			qmblk->dq_flags |= VZDQUG_FIXED_SET;
+			break; /* limit reached */
+		}
+
+		/* update usage/limits
+		 * we can copy the data without the lock, because the data
+		 * cannot be modified in VZDQ_STARTING state */
+		ugid->qugid_stat = ugid_buf.qi_stat;
+
+		vzquota_put_ugid(qmblk, ugid);
+	}
+out:
+	up(&vz_quota_sem);
+
+	return ret;
+}
+
+static int quota_ugid_setgrace(unsigned int quota_id,
+		struct dq_info __user u_dq_info[], int compat)
+{
+	struct vz_quota_master *qmblk;
+	struct dq_info dq_info[MAXQUOTAS];
+	struct dq_info *target;
+	int err, type;
+
+	down(&vz_quota_sem);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EBUSY;
+	if (qmblk->dq_state != VZDQ_STARTING)
+		goto out; /* working quota doesn't accept changing options */
+
+	err = -EFAULT;
+	if (!compat) {
+		if (copy_from_user(dq_info, u_dq_info, sizeof(dq_info)))
+			goto out;
+	} else {
+#ifdef CONFIG_COMPAT
+		struct compat_dq_info odqi[MAXQUOTAS];
+		if (copy_from_user(odqi, u_dq_info, sizeof(odqi)))
+			goto out;
+		for (type = 0; type < MAXQUOTAS; type++)
+			compat_dqinfo2dqinfo(&odqi[type], &dq_info[type]);
+#endif
+	}
+
+	err = 0;
+
+	/* update in qmblk */
+	for (type = 0; type < MAXQUOTAS; type++) {
+		target = &qmblk->dq_ugid_info[type];
+		target->bexpire = dq_info[type].bexpire;
+		target->iexpire = dq_info[type].iexpire;
+	}
+out:
+	up(&vz_quota_sem);
+
+	return err;
+}
+
+static int do_quota_ugid_getstat(struct vz_quota_master *qmblk, int index, int size,
+		struct vz_quota_iface *u_ugid_buf)
+{
+	int type, count;
+	struct vz_quota_ugid *ugid;
+
+	if (QTREE_LEAFNUM(qmblk->dq_uid_tree) +
+	    QTREE_LEAFNUM(qmblk->dq_gid_tree)
+	    		<= index)
+		return 0;
+
+	count = 0;
+
+	type = index < QTREE_LEAFNUM(qmblk->dq_uid_tree) ? USRQUOTA : GRPQUOTA;
+	if (type == GRPQUOTA)
+		index -= QTREE_LEAFNUM(qmblk->dq_uid_tree);
+
+	/* loop through ugid and then qgid quota */
+repeat:
+	for (ugid = vzquota_get_byindex(qmblk, index, type);
+		ugid != NULL && count < size;
+		ugid = vzquota_get_next(qmblk, ugid), count++)
+	{
+		struct vz_quota_iface ugid_buf;
+
+		/* form interface buffer and send in to user-level */
+		qmblk_data_read_lock(qmblk);
+		memcpy(&ugid_buf.qi_stat, &ugid->qugid_stat,
+				sizeof(ugid_buf.qi_stat));
+		qmblk_data_read_unlock(qmblk);
+		ugid_buf.qi_id = ugid->qugid_id;
+		ugid_buf.qi_type = ugid->qugid_type;
+
+		memcpy(u_ugid_buf, &ugid_buf, sizeof(ugid_buf));
+		u_ugid_buf++; /* next portion of user buffer */
+	}
+
+	if (type == USRQUOTA && count < size) {
+		type = GRPQUOTA;
+		index = 0;
+		goto repeat;
+	}
+
+	return count;
+}
+
+static int quota_ugid_getstat(unsigned int quota_id,
+		int index, int size, struct vz_quota_iface __user *u_ugid_buf,
+		int compat)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_iface *k_ugid_buf;
+	int err;
+
+	if (index < 0 || size < 0)
+		return -EINVAL;
+
+	if (size > INT_MAX / sizeof(struct vz_quota_iface))
+		return -EINVAL;
+
+	k_ugid_buf = vmalloc(size * sizeof(struct vz_quota_iface));
+	if (k_ugid_buf == NULL)
+		return -ENOMEM;
+
+	down(&vz_quota_sem);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	down(&qmblk->dq_sem);
+	err = do_quota_ugid_getstat(qmblk, index, size, k_ugid_buf);
+	up(&qmblk->dq_sem);
+	if (err < 0)
+		goto out;
+
+	if (!compat) {
+		if (copy_to_user(u_ugid_buf, k_ugid_buf,
+					err * sizeof(struct vz_quota_iface)))
+			err = -EFAULT;
+	} else {
+#ifdef CONFIG_COMPAT
+		struct compat_vz_quota_iface oqif;
+		int i;
+		for (i = 0; i < err; i++) {
+			oqif.qi_id = k_ugid_buf[i].qi_id;
+			oqif.qi_type = k_ugid_buf[i].qi_type;
+			dqstat2compat_dqstat(&k_ugid_buf[i].qi_stat,
+					  &oqif.qi_stat);
+			if (copy_to_user(u_ugid_buf, &oqif, sizeof(oqif)))
+				err = -EFAULT;
+			u_ugid_buf = (struct vz_quota_iface __user *)
+					(((void *)u_ugid_buf) + sizeof(oqif));
+		}
+#endif
+	}
+
+out:
+	up(&vz_quota_sem);
+	vfree(k_ugid_buf);
+	return err;
+}
+
+static int quota_ugid_getgrace(unsigned int quota_id,
+		struct dq_info __user u_dq_info[], int compat)
+{
+	struct vz_quota_master *qmblk;
+	struct dq_info dq_info[MAXQUOTAS];
+	struct dq_info *target;
+	int err, type;
+
+	down(&vz_quota_sem);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = 0;
+	/* update from qmblk */
+	for (type = 0; type < MAXQUOTAS; type ++) {
+		target = &qmblk->dq_ugid_info[type];
+		dq_info[type].bexpire = target->bexpire;
+		dq_info[type].iexpire = target->iexpire;
+		dq_info[type].flags = target->flags;
+	}
+
+	if (!compat) {
+		if (copy_to_user(u_dq_info, dq_info, sizeof(dq_info)))
+			err = -EFAULT;
+	} else {
+#ifdef CONFIG_COMPAT
+		struct compat_dq_info odqi[MAXQUOTAS];
+		for (type = 0; type < MAXQUOTAS; type ++)
+			dqinfo2compat_dqinfo(&dq_info[type], &odqi[type]);
+		if (copy_to_user(u_dq_info, odqi, sizeof(odqi)))
+			err = -EFAULT;
+#endif
+	}
+out:
+	up(&vz_quota_sem);
+
+	return err;
+}
+
+static int quota_ugid_getconfig(unsigned int quota_id,
+		struct vz_quota_ugid_stat __user *info)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ugid_stat kinfo;
+	int err;
+
+	down(&vz_quota_sem);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = 0;
+	kinfo.limit = qmblk->dq_ugid_max;
+	kinfo.count = qmblk->dq_ugid_count;
+	kinfo.flags = qmblk->dq_flags;
+
+	if (copy_to_user(info, &kinfo, sizeof(kinfo)))
+		err = -EFAULT;
+out:
+	up(&vz_quota_sem);
+
+	return err;
+}
+
+static int quota_ugid_setconfig(unsigned int quota_id,
+		struct vz_quota_ugid_stat __user *info)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ugid_stat kinfo;
+	int err;
+
+	down(&vz_quota_sem);
+
+	err = -ENOENT;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EFAULT;
+	if (copy_from_user(&kinfo, info, sizeof(kinfo)))
+		goto out;
+
+	err = 0;
+	qmblk->dq_ugid_max = kinfo.limit;
+	if (qmblk->dq_state == VZDQ_STARTING) {
+		qmblk->dq_flags = kinfo.flags;
+		if (qmblk->dq_flags & VZDQUG_ON)
+			qmblk->dq_flags |= VZDQ_USRQUOTA | VZDQ_GRPQUOTA;
+	}
+
+out:
+	up(&vz_quota_sem);
+
+	return err;
+}
+
+static int quota_ugid_setlimit(unsigned int quota_id,
+		struct vz_quota_ugid_setlimit __user *u_lim)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ugid_setlimit lim;
+	int err;
+
+	down(&vz_quota_sem);
+
+	err = -ESRCH;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EFAULT;
+	if (copy_from_user(&lim, u_lim, sizeof(lim)))
+		goto out;
+
+	err = __vz_set_dqblk(qmblk, lim.type, lim.id, &lim.dqb);
+
+out:
+	up(&vz_quota_sem);
+
+	return err;
+}
+
+static int quota_ugid_setinfo(unsigned int quota_id,
+		struct vz_quota_ugid_setinfo __user *u_info)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ugid_setinfo info;
+	int err;
+
+	down(&vz_quota_sem);
+
+	err = -ESRCH;
+	qmblk = vzquota_find_master(quota_id);
+	if (qmblk == NULL)
+		goto out;
+
+	err = -EFAULT;
+	if (copy_from_user(&info, u_info, sizeof(info)))
+		goto out;
+
+	err = __vz_set_dqinfo(qmblk, info.type, &info.dqi);
+
+out:
+	up(&vz_quota_sem);
+
+	return err;
+}
+
+/*
+ * This is a system call to maintain UGID quotas
+ * Note this call is allowed to run ONLY from VE0
+ */
+long do_vzquotaugidctl(int cmd, unsigned int quota_id,
+		unsigned int ugid_index, unsigned int ugid_size,
+		void *addr, int compat)
+{
+	int ret;
+
+	ret = -EPERM;
+	/* access allowed only from root of VE0 */
+	if (!capable(CAP_SYS_RESOURCE) ||
+	    !capable(CAP_SYS_ADMIN))
+		goto out;
+
+	switch (cmd) {
+		case VZ_DQ_UGID_GETSTAT:
+			ret = quota_ugid_getstat(quota_id,
+					ugid_index, ugid_size,
+				       	(struct vz_quota_iface __user *)addr,
+					compat);
+			break;
+		case VZ_DQ_UGID_ADDSTAT:
+			ret = quota_ugid_addstat(quota_id, ugid_size,
+					(struct vz_quota_iface __user *) addr,
+					compat);
+			break;
+		case VZ_DQ_UGID_GETGRACE:
+			ret = quota_ugid_getgrace(quota_id,
+					(struct dq_info __user *)addr, compat);
+			break;
+		case VZ_DQ_UGID_SETGRACE:
+			ret = quota_ugid_setgrace(quota_id,
+					(struct dq_info __user *)addr, compat);
+			break;
+		case VZ_DQ_UGID_GETCONFIG:
+			ret = quota_ugid_getconfig(quota_id,
+					(struct vz_quota_ugid_stat __user *)
+								addr);
+			break;
+		case VZ_DQ_UGID_SETCONFIG:
+			ret = quota_ugid_setconfig(quota_id,
+					(struct vz_quota_ugid_stat __user *)
+								addr);
+			break;
+		case VZ_DQ_UGID_SETLIMIT:
+			ret = quota_ugid_setlimit(quota_id,
+					(struct vz_quota_ugid_setlimit __user *)
+								addr);
+			break;
+		case VZ_DQ_UGID_SETINFO:
+			ret = quota_ugid_setinfo(quota_id,
+					(struct vz_quota_ugid_setinfo __user *)
+								addr);
+			break;
+		default:
+			ret = -EINVAL;
+			goto out;
+	}
+out:
+	return ret;
+}
+
+static void ugid_quota_on_sb(struct super_block *sb)
+{
+	struct super_block *real_sb;
+	struct vz_quota_master *qmblk;
+
+	if (!sb->s_op->get_quota_root)
+		return;
+
+	real_sb = sb->s_op->get_quota_root(sb)->i_sb;
+	if (real_sb->dq_op != &vz_quota_operations)
+		return;
+
+	sb->dq_op = &vz_quota_operations2;
+	sb->s_qcop = &vz_quotactl_operations;
+	INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
+	INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
+	sb->s_dquot.info[USRQUOTA].dqi_format = &vz_quota_empty_v2_format;
+	sb->s_dquot.info[GRPQUOTA].dqi_format = &vz_quota_empty_v2_format;
+
+	qmblk = vzquota_find_qmblk(sb);
+	if ((qmblk == NULL) || (qmblk == VZ_QUOTA_BAD))
+		return;
+	down(&vz_quota_sem);
+	if (qmblk->dq_flags & VZDQ_USRQUOTA)
+		sb->s_dquot.flags |= DQUOT_USR_ENABLED;
+	if (qmblk->dq_flags & VZDQ_GRPQUOTA)
+		sb->s_dquot.flags |= DQUOT_GRP_ENABLED;
+	up(&vz_quota_sem);
+	qmblk_put(qmblk);
+}
+
+static void ugid_quota_off_sb(struct super_block *sb)
+{
+	/* can't make quota off on mounted super block */
+	BUG_ON(sb->s_root != NULL);
+}
+
+static int ugid_notifier_call(struct vnotifier_block *self,
+		unsigned long n, void *data, int old_ret)
+{
+	struct virt_info_quota *viq;
+
+	viq = (struct virt_info_quota *)data;
+
+	switch (n) {
+	case VIRTINFO_QUOTA_ON:
+		ugid_quota_on_sb(viq->super);
+		break;
+	case VIRTINFO_QUOTA_OFF:
+		ugid_quota_off_sb(viq->super);
+		break;
+	case VIRTINFO_QUOTA_GETSTAT:
+		break;
+	default:
+		return old_ret;
+	}
+	return NOTIFY_OK;
+}
+
+static struct vnotifier_block ugid_notifier_block = {
+	.notifier_call = ugid_notifier_call,
+};
+
+/* ----------------------------------------------------------------------
+ * Init/exit.
+ * --------------------------------------------------------------------- */
+
+int vzquota_ugid_init(void)
+{
+	int err;
+
+	vz_quota_ugid_cachep = kmem_cache_create("vz_quota_ugid",
+				      sizeof(struct vz_quota_ugid),
+				      0, SLAB_HWCACHE_ALIGN,
+				      NULL, NULL);
+	if (vz_quota_ugid_cachep == NULL)
+		goto err_slab;
+
+	err = register_quota_format(&vz_quota_empty_v2_format);
+	if (err)
+		goto err_reg;
+
+	virtinfo_notifier_register(VITYPE_QUOTA, &ugid_notifier_block);
+	return 0;
+
+err_reg:
+	kmem_cache_destroy(vz_quota_ugid_cachep);
+	return err;
+
+err_slab:
+	printk(KERN_ERR "Cannot create VZ_QUOTA SLAB cache\n");
+	return -ENOMEM;
+}
+
+void vzquota_ugid_release(void)
+{
+	virtinfo_notifier_unregister(VITYPE_QUOTA, &ugid_notifier_block);
+	unregister_quota_format(&vz_quota_empty_v2_format);
+
+	if (kmem_cache_destroy(vz_quota_ugid_cachep))
+		printk(KERN_ERR "VZQUOTA: kmem_cache_destroy failed\n");
+}
diff -upr linux-2.6.16.46-0.12.orig/fs/vzdquot.c linux-2.6.16.46-0.12-027test011/fs/vzdquot.c
--- linux-2.6.16.46-0.12.orig/fs/vzdquot.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/vzdquot.c	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,1831 @@
+/*
+ * Copyright (C) 2001, 2002, 2004, 2005  SWsoft
+ * All rights reserved.
+ *
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * This file contains the core of Virtuozzo disk quota implementation:
+ * maintenance of VZDQ information in inodes,
+ * external interfaces,
+ * module entry.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <asm/atomic.h>
+#include <linux/spinlock.h>
+#include <asm/semaphore.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/dcache.h>
+#include <linux/quota.h>
+#include <linux/rcupdate.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <linux/vzctl.h>
+#include <linux/vzctl_quota.h>
+#include <linux/vzquota.h>
+#include <linux/virtinfo.h>
+#include <linux/vzdq_tree.h>
+
+/* ----------------------------------------------------------------------
+ *
+ * Locking
+ *
+ * ---------------------------------------------------------------------- */
+
+/*
+ * Serializes on/off and all other do_vzquotactl operations.
+ * Protects qmblk hash.
+ */
+struct semaphore vz_quota_sem;
+
+/*
+ * Data access locks
+ *  inode_qmblk
+ *	protects qmblk pointers in all inodes and qlnk content in general
+ *	(but not qmblk content);
+ *	also protects related qmblk invalidation procedures;
+ *	can't be per-inode because of vzquota_dtree_qmblk complications
+ *	and problems with serialization with quota_on,
+ *	but can be per-superblock;
+ *  qmblk_data
+ *	protects qmblk fields (such as current usage)
+ *  quota_data
+ *	protects charge/uncharge operations, thus, implies
+ *	qmblk_data lock and, if CONFIG_VZ_QUOTA_UGID, inode_qmblk lock
+ *	(to protect ugid pointers).
+ *
+ * Lock order:
+ *  inode_qmblk_lock -> dcache_lock
+ *  inode_qmblk_lock -> qmblk_data
+ */
+static spinlock_t vzdq_qmblk_lock = SPIN_LOCK_UNLOCKED;
+
+inline void inode_qmblk_lock(struct super_block *sb)
+{
+	spin_lock(&vzdq_qmblk_lock);
+}
+
+inline void inode_qmblk_unlock(struct super_block *sb)
+{
+	spin_unlock(&vzdq_qmblk_lock);
+}
+
+inline void qmblk_data_read_lock(struct vz_quota_master *qmblk)
+{
+	spin_lock(&qmblk->dq_data_lock);
+}
+
+inline void qmblk_data_read_unlock(struct vz_quota_master *qmblk)
+{
+	spin_unlock(&qmblk->dq_data_lock);
+}
+
+inline void qmblk_data_write_lock(struct vz_quota_master *qmblk)
+{
+	spin_lock(&qmblk->dq_data_lock);
+}
+
+inline void qmblk_data_write_unlock(struct vz_quota_master *qmblk)
+{
+	spin_unlock(&qmblk->dq_data_lock);
+}
+
+struct quota_format_type vz_quota_empty_v2_format = {
+	.qf_fmt_id	= QFMT_VFS_V0,
+	.qf_ops		= NULL,
+	.qf_owner	= THIS_MODULE,
+};
+
+/* ----------------------------------------------------------------------
+ *
+ * Master hash table handling.
+ *
+ * SMP not safe, serialied by vz_quota_sem within quota syscalls
+ *
+ * --------------------------------------------------------------------- */
+
+static kmem_cache_t *vzquota_cachep;
+
+/*
+ * Hash function.
+ */
+#define QHASH_BITS		6
+#define	VZ_QUOTA_HASH_SIZE	(1 << QHASH_BITS)
+#define QHASH_MASK		(VZ_QUOTA_HASH_SIZE - 1)
+
+struct list_head vzquota_hash_table[VZ_QUOTA_HASH_SIZE];
+int vzquota_hash_size = VZ_QUOTA_HASH_SIZE;
+
+static inline int vzquota_hash_func(unsigned int qid)
+{
+	return (((qid >> QHASH_BITS) ^ qid) & QHASH_MASK);
+}
+
+/**
+ * vzquota_alloc_master - alloc and instantiate master quota record
+ *
+ * Returns:
+ *	pointer to newly created record if SUCCESS
+ *	-ENOMEM if out of memory
+ *	-EEXIST if record with given quota_id already exist
+ */
+struct vz_quota_master *vzquota_alloc_master(unsigned int quota_id,
+		struct vz_quota_stat *qstat)
+{
+	int err;
+	struct vz_quota_master *qmblk;
+
+	err = -EEXIST;
+	if (vzquota_find_master(quota_id) != NULL)
+		goto out;
+
+	err = -ENOMEM;
+	qmblk = kmem_cache_alloc(vzquota_cachep, SLAB_KERNEL);
+	if (qmblk == NULL)
+		goto out;
+#ifdef CONFIG_VZ_QUOTA_UGID
+	qmblk->dq_uid_tree = quotatree_alloc();
+	if (!qmblk->dq_uid_tree)
+		goto out_free;
+
+	qmblk->dq_gid_tree = quotatree_alloc();
+	if (!qmblk->dq_gid_tree)
+		goto out_free_tree;
+#endif
+
+	qmblk->dq_state = VZDQ_STARTING;
+	init_MUTEX(&qmblk->dq_sem);
+	spin_lock_init(&qmblk->dq_data_lock);
+
+	qmblk->dq_id = quota_id;
+	qmblk->dq_stat = qstat->dq_stat;
+	qmblk->dq_info = qstat->dq_info;
+	qmblk->dq_root_dentry = NULL;
+	qmblk->dq_root_mnt = NULL;
+	qmblk->dq_sb = NULL;
+	qmblk->dq_ugid_count = 0;
+	qmblk->dq_ugid_max = 0;
+	qmblk->dq_flags = 0;
+	memset(qmblk->dq_ugid_info, 0, sizeof(qmblk->dq_ugid_info));
+	INIT_LIST_HEAD(&qmblk->dq_ilink_list);
+
+	atomic_set(&qmblk->dq_count, 1);
+
+	/* insert in hash chain */
+	list_add(&qmblk->dq_hash,
+		&vzquota_hash_table[vzquota_hash_func(quota_id)]);
+
+	/* success */
+	return qmblk;
+
+#ifdef CONFIG_VZ_QUOTA_UGID
+out_free_tree:
+	quotatree_free(qmblk->dq_uid_tree, NULL);
+out_free:
+	kmem_cache_free(vzquota_cachep, qmblk);
+#endif
+out:
+	return ERR_PTR(err);
+}
+
+static struct vz_quota_master *vzquota_alloc_fake(void)
+{
+	struct vz_quota_master *qmblk;
+
+	qmblk = kmem_cache_alloc(vzquota_cachep, SLAB_KERNEL);
+	if (qmblk == NULL)
+		return NULL;
+	memset(qmblk, 0, sizeof(*qmblk));
+	qmblk->dq_state = VZDQ_STOPING;
+	qmblk->dq_flags = VZDQ_NOQUOT;
+	spin_lock_init(&qmblk->dq_data_lock);
+	INIT_LIST_HEAD(&qmblk->dq_ilink_list);
+	atomic_set(&qmblk->dq_count, 1);
+	return qmblk;
+}
+
+/**
+ * vzquota_find_master - find master record with given id
+ *
+ * Returns qmblk without touching its refcounter.
+ * Called under vz_quota_sem.
+ */
+struct vz_quota_master *vzquota_find_master(unsigned int quota_id)
+{
+	int i;
+	struct vz_quota_master *qp;
+
+	i = vzquota_hash_func(quota_id);
+	list_for_each_entry(qp, &vzquota_hash_table[i], dq_hash) {
+		if (qp->dq_id == quota_id)
+			return qp;
+	}
+	return NULL;
+}
+
+/**
+ * vzquota_free_master - release resources taken by qmblk, freeing memory
+ *
+ * qmblk is assumed to be already taken out from the hash.
+ * Should be called outside vz_quota_sem.
+ */
+void vzquota_free_master(struct vz_quota_master *qmblk)
+{
+#ifdef CONFIG_VZ_QUOTA_UGID
+	vzquota_kill_ugid(qmblk);
+#endif
+	BUG_ON(!list_empty(&qmblk->dq_ilink_list));
+	kmem_cache_free(vzquota_cachep, qmblk);
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Passing quota information through current
+ *
+ * Used in inode -> qmblk lookup at inode creation stage (since at that
+ * time there are no links between the inode being created and its parent
+ * directory).
+ *
+ * --------------------------------------------------------------------- */
+
+#define VZDQ_CUR_MAGIC	0x57d0fee2
+
+static inline int vzquota_cur_qmblk_check(void)
+{
+	return current->magic == VZDQ_CUR_MAGIC;
+}
+
+static inline struct inode *vzquota_cur_qmblk_fetch(void)
+{
+	return current->ino;
+}
+
+static inline void vzquota_cur_qmblk_set(struct inode *data)
+{
+	struct task_struct *tsk;
+
+	tsk = current;
+	tsk->magic = VZDQ_CUR_MAGIC;
+	tsk->ino = data;
+}
+
+#if 0
+static inline void vzquota_cur_qmblk_reset(void)
+{
+	current->magic = 0;
+}
+#endif
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Superblock quota operations
+ *
+ * --------------------------------------------------------------------- */
+
+/*
+ * Kernel structure abuse.
+ * We use files[0] pointer as an int variable:
+ * reference counter of how many quota blocks uses this superblock.
+ * files[1] is used for generations structure which helps us to track
+ * when traversing of dentries is really required.
+ */
+#define __VZ_QUOTA_NOQUOTA(sb)		sb->s_dquot.vzdq_master
+#define __VZ_QUOTA_TSTAMP(sb)		((struct timeval *)\
+						&sb->s_dquot.dqio_sem)
+
+#if defined(VZ_QUOTA_UNLOAD)
+
+#define __VZ_QUOTA_SBREF(sb)		sb->s_dquot.vzdq_count
+
+struct dquot_operations *orig_dq_op;
+struct quotactl_ops *orig_dq_cop;
+
+/**
+ * quota_get_super - account for new a quoted tree under the superblock
+ *
+ * One superblock can have multiple directory subtrees with different VZ
+ * quotas.  We keep a counter of such subtrees and set VZ quota operations or
+ * reset the default ones.
+ *
+ * Called under vz_quota_sem (from quota_on).
+ */
+int vzquota_get_super(struct super_block *sb)
+{
+	if (sb->dq_op != &vz_quota_operations) {
+		down(&sb->s_dquot.dqonoff_sem);
+		if (sb->s_dquot.flags & (DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED)) {
+			up(&sb->s_dquot.dqonoff_sem);
+			return -EEXIST;
+		}
+		if (orig_dq_op == NULL && sb->dq_op != NULL)
+			orig_dq_op = sb->dq_op;
+		sb->dq_op = &vz_quota_operations;
+		if (orig_dq_cop == NULL && sb->s_qcop != NULL)
+			orig_dq_cop = sb->s_qcop;
+		/* XXX this may race with sys_quotactl */
+#ifdef CONFIG_VZ_QUOTA_UGID
+		sb->s_qcop = &vz_quotactl_operations;
+#else
+		sb->s_qcop = NULL;
+#endif
+		do_gettimeofday(__VZ_QUOTA_TSTAMP(sb));
+		memset(&sb->s_dquot.info, 0, sizeof(sb->s_dquot.info));
+
+		INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
+		INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
+		sb->s_dquot.info[USRQUOTA].dqi_format = &vz_quota_empty_v2_format;
+		sb->s_dquot.info[GRPQUOTA].dqi_format = &vz_quota_empty_v2_format;
+		/*
+		 * To get quotaops.h call us we need to mark superblock
+		 * as having quota.  These flags mark the moment when
+		 * our dq_op start to be called.
+		 *
+		 * The ordering of dq_op and s_dquot.flags assignment
+		 * needs to be enforced, but other CPUs do not do rmb()
+		 * between s_dquot.flags and dq_op accesses.
+		 */
+		wmb(); synchronize_sched();
+		sb->s_dquot.flags = DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED;
+		__module_get(THIS_MODULE);
+		up(&sb->s_dquot.dqonoff_sem);
+	}
+	/* protected by vz_quota_sem */
+	__VZ_QUOTA_SBREF(sb)++;
+	return 0;
+}
+
+/**
+ * quota_put_super - release superblock when one quota tree goes away
+ *
+ * Called under vz_quota_sem.
+ */
+void vzquota_put_super(struct super_block *sb)
+{
+	int count;
+
+	count = --__VZ_QUOTA_SBREF(sb);
+	if (count == 0) {
+		down(&sb->s_dquot.dqonoff_sem);
+		sb->s_dquot.flags = 0;
+		wmb(); synchronize_sched();
+		sema_init(&sb->s_dquot.dqio_sem, 1);
+		sb->s_qcop = orig_dq_cop;
+		sb->dq_op = orig_dq_op;
+		inode_qmblk_lock(sb);
+		quota_gen_put(SB_QGEN(sb));
+		SB_QGEN(sb) = NULL;
+		/* release qlnk's without qmblk */
+		remove_inode_quota_links_list(&non_vzquota_inodes_lh,
+				sb, NULL);
+		/*
+		 * Races with quota initialization:
+		 * after this inode_qmblk_unlock all inode's generations are
+		 * invalidated, quota_inode_qmblk checks superblock operations.
+		 */
+		inode_qmblk_unlock(sb);
+		/*
+		 * Module refcounting: in theory, this is the best place
+		 * to call module_put(THIS_MODULE).
+		 * In reality, it can't be done because we can't be sure that
+		 * other CPUs do not enter our code segment through dq_op
+		 * cached long time ago.  Quotaops interface isn't supposed to
+		 * go into modules currently (that is, into unloadable
+		 * modules).  By omitting module_put, our module isn't
+		 * unloadable.
+		 */
+		up(&sb->s_dquot.dqonoff_sem);
+	}
+}
+
+#else
+
+struct vzquota_new_sop {
+	struct super_operations new_op;
+	struct super_operations *old_op;
+};
+
+/**
+ * vzquota_shutdown_super - callback on umount
+ */
+void vzquota_shutdown_super(struct super_block *sb)
+{
+	struct vz_quota_master *qmblk;
+	struct vzquota_new_sop *sop;
+
+	qmblk = __VZ_QUOTA_NOQUOTA(sb);
+	__VZ_QUOTA_NOQUOTA(sb) = NULL;
+	if (qmblk != NULL)
+		qmblk_put(qmblk);
+	sop = container_of(sb->s_op, struct vzquota_new_sop, new_op);
+	sb->s_op = sop->old_op;
+	kfree(sop);
+	if (sb->s_op->put_super != NULL)
+		(*sb->s_op->put_super)(sb);
+}
+
+/**
+ * vzquota_get_super - account for new a quoted tree under the superblock
+ *
+ * One superblock can have multiple directory subtrees with different VZ
+ * quotas.
+ *
+ * Called under vz_quota_sem (from vzquota_on).
+ */
+int vzquota_get_super(struct super_block *sb)
+{
+	struct vz_quota_master *qnew;
+	struct vzquota_new_sop *sop;
+	int err;
+
+	down(&sb->s_dquot.dqonoff_sem);
+	err = -EEXIST;
+	if ((sb->s_dquot.flags & (DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED)) &&
+	    sb->dq_op != &vz_quota_operations)
+		goto out_up;
+
+	/*
+	 * This allocation code should be under sb->dq_op check below, but
+	 * it doesn't really matter...
+	 */
+	if (__VZ_QUOTA_NOQUOTA(sb) == NULL) {
+		qnew = vzquota_alloc_fake();
+		if (qnew == NULL)
+			goto out_up;
+		__VZ_QUOTA_NOQUOTA(sb) = qnew;
+	}
+
+	if (sb->dq_op != &vz_quota_operations) {
+		sop = kmalloc(sizeof(*sop), GFP_KERNEL);
+		if (sop == NULL) {
+			vzquota_free_master(__VZ_QUOTA_NOQUOTA(sb));
+			__VZ_QUOTA_NOQUOTA(sb) = NULL;
+			goto out_up;
+		}
+		memcpy(&sop->new_op, sb->s_op, sizeof(sop->new_op));
+		sop->new_op.put_super = &vzquota_shutdown_super;
+		sop->old_op = sb->s_op;
+		sb->s_op = &sop->new_op;
+
+		sb->dq_op = &vz_quota_operations;
+#ifdef CONFIG_VZ_QUOTA_UGID
+		sb->s_qcop = &vz_quotactl_operations;
+#else
+		sb->s_qcop = NULL;
+#endif
+		do_gettimeofday(__VZ_QUOTA_TSTAMP(sb));
+
+		memset(&sb->s_dquot.info, 0, sizeof(sb->s_dquot.info));
+		/* these 2 list heads are checked in sync_dquots() */
+		INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
+		INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
+		sb->s_dquot.info[USRQUOTA].dqi_format =
+						&vz_quota_empty_v2_format;
+		sb->s_dquot.info[GRPQUOTA].dqi_format =
+						&vz_quota_empty_v2_format;
+
+		/*
+		 * To get quotaops.h to call us we need to mark superblock
+		 * as having quota.  These flags mark the moment when
+		 * our dq_op start to be called.
+		 *
+		 * The ordering of dq_op and s_dquot.flags assignment
+		 * needs to be enforced, but other CPUs do not do rmb()
+		 * between s_dquot.flags and dq_op accesses.
+		 */
+		wmb(); synchronize_sched();
+		sb->s_dquot.flags = DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED;
+	}
+	err = 0;
+
+out_up:
+	up(&sb->s_dquot.dqonoff_sem);
+	return err;
+}
+
+/**
+ * vzquota_put_super - one quota tree less on this superblock
+ *
+ * Called under vz_quota_sem.
+ */
+void vzquota_put_super(struct super_block *sb)
+{
+	/*
+	 * Even if this put is the last one,
+	 * sb->s_dquot.flags can't be cleared, because otherwise vzquota_drop
+	 * won't be called and the remaining qmblk references won't be put.
+	 */
+}
+
+#endif
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Helpers for inode -> qmblk link maintenance
+ *
+ * --------------------------------------------------------------------- */
+
+#define __VZ_QUOTA_EMPTY		((void *)0xbdbdbdbd)
+#define VZ_QUOTA_IS_NOQUOTA(qm, sb)	((qm)->dq_flags & VZDQ_NOQUOT)
+#define VZ_QUOTA_EMPTY_IOPS		(&vfs_empty_iops)
+extern struct inode_operations vfs_empty_iops;
+
+static int VZ_QUOTA_IS_ACTUAL(struct inode *inode)
+{
+	struct vz_quota_master *qmblk;
+
+	qmblk = INODE_QLNK(inode)->qmblk;
+	if (qmblk == VZ_QUOTA_BAD)
+		return 1;
+	if (qmblk == __VZ_QUOTA_EMPTY)
+		return 0;
+	if (qmblk->dq_flags & VZDQ_NOACT)
+		/* not actual (invalidated) qmblk */
+		return 0;
+	return 1;
+}
+
+static inline int vzquota_qlnk_is_empty(struct vz_quota_ilink *qlnk)
+{
+	return qlnk->qmblk == __VZ_QUOTA_EMPTY;
+}
+
+static inline void set_qlnk_origin(struct vz_quota_ilink *qlnk,
+		unsigned char origin)
+{
+	qlnk->origin[0] = qlnk->origin[1];
+	qlnk->origin[1] = origin;
+}
+
+static inline void vzquota_qlnk_set_empty(struct vz_quota_ilink *qlnk)
+{
+	qlnk->qmblk = __VZ_QUOTA_EMPTY;
+	set_qlnk_origin(qlnk, VZ_QUOTAO_SETE);
+}
+
+void vzquota_qlnk_init(struct vz_quota_ilink *qlnk)
+{
+	memset(qlnk, 0, sizeof(*qlnk));
+	INIT_LIST_HEAD(&qlnk->list);
+	vzquota_qlnk_set_empty(qlnk);
+	set_qlnk_origin(qlnk, VZ_QUOTAO_INIT);
+}
+
+void vzquota_qlnk_destroy(struct vz_quota_ilink *qlnk)
+{
+	might_sleep();
+	if (vzquota_qlnk_is_empty(qlnk))
+		return;
+#if defined(CONFIG_VZ_QUOTA_UGID)
+	if (qlnk->qmblk != NULL && qlnk->qmblk != VZ_QUOTA_BAD) {
+		struct vz_quota_master *qmblk;
+		struct vz_quota_ugid *quid, *qgid;
+		qmblk = qlnk->qmblk;
+		quid = qlnk->qugid[USRQUOTA];
+		qgid = qlnk->qugid[GRPQUOTA];
+		if (quid != NULL || qgid != NULL) {
+			down(&qmblk->dq_sem);
+			if (qgid != NULL)
+				vzquota_put_ugid(qmblk, qgid);
+			if (quid != NULL)
+				vzquota_put_ugid(qmblk, quid);
+			up(&qmblk->dq_sem);
+		}
+	}
+#endif
+	if (qlnk->qmblk != NULL && qlnk->qmblk != VZ_QUOTA_BAD)
+		qmblk_put(qlnk->qmblk);
+	set_qlnk_origin(qlnk, VZ_QUOTAO_DESTR);
+}
+
+/**
+ * vzquota_qlnk_swap - swap inode's and temporary vz_quota_ilink contents
+ * @qlt: temporary
+ * @qli: inode's
+ *
+ * Locking is provided by the caller (depending on the context).
+ * After swap, @qli is inserted into the corresponding dq_ilink_list,
+ * @qlt list is reinitialized.
+ */
+static void vzquota_qlnk_swap(struct vz_quota_ilink *qlt,
+		struct vz_quota_ilink *qli)
+{
+	struct vz_quota_master *qb;
+	struct vz_quota_ugid *qu;
+	int i;
+
+	qb = qlt->qmblk;
+	qlt->qmblk = qli->qmblk;
+	qli->qmblk = qb;
+	list_del_init(&qli->list);
+	if (qb != __VZ_QUOTA_EMPTY && qb != VZ_QUOTA_BAD)
+		list_add(&qli->list, &qb->dq_ilink_list);
+	INIT_LIST_HEAD(&qlt->list);
+	set_qlnk_origin(qli, VZ_QUOTAO_SWAP);
+
+	for (i = 0; i < MAXQUOTAS; i++) {
+		qu = qlt->qugid[i];
+		qlt->qugid[i] = qli->qugid[i];
+		qli->qugid[i] = qu;
+	}
+}
+
+/**
+ * vzquota_qlnk_reinit_locked - destroy qlnk content, called under locks
+ *
+ * Called under dcache_lock and inode_qmblk locks.
+ * Returns 1 if locks were dropped inside, 0 if atomic.
+ */
+static int vzquota_qlnk_reinit_locked(struct vz_quota_ilink *qlnk,
+		struct inode *inode)
+{
+	if (vzquota_qlnk_is_empty(qlnk))
+		return 0;
+	if (qlnk->qmblk == VZ_QUOTA_BAD) {
+		vzquota_qlnk_set_empty(qlnk);
+		set_qlnk_origin(qlnk, VZ_QUOTAO_RE_LOCK);
+		return 0;
+	}
+	spin_unlock(&dcache_lock);
+	inode_qmblk_unlock(inode->i_sb);
+	vzquota_qlnk_destroy(qlnk);
+	vzquota_qlnk_init(qlnk);
+	inode_qmblk_lock(inode->i_sb);
+	spin_lock(&dcache_lock);
+	return 1;
+}
+
+#if defined(CONFIG_VZ_QUOTA_UGID)
+/**
+ * vzquota_qlnk_reinit_attr - destroy and reinit qlnk content
+ *
+ * Similar to vzquota_qlnk_reinit_locked, called under different locks.
+ */
+static int vzquota_qlnk_reinit_attr(struct vz_quota_ilink *qlnk,
+		struct inode *inode,
+		struct vz_quota_master *qmblk)
+{
+	if (vzquota_qlnk_is_empty(qlnk))
+		return 0;
+	/* may be optimized if qlnk->qugid all NULLs */
+	qmblk_data_write_unlock(qmblk);
+	inode_qmblk_unlock(inode->i_sb);
+	vzquota_qlnk_destroy(qlnk);
+	vzquota_qlnk_init(qlnk);
+	inode_qmblk_lock(inode->i_sb);
+	qmblk_data_write_lock(qmblk);
+	return 1;
+}
+#endif
+
+/**
+ * vzquota_qlnk_fill - fill vz_quota_ilink content
+ * @qlnk: vz_quota_ilink to fill
+ * @inode: inode for which @qlnk is filled (i_sb, i_uid, i_gid)
+ * @qmblk: qmblk to which this @qlnk will belong
+ *
+ * Called under dcache_lock and inode_qmblk locks.
+ * Returns 1 if locks were dropped inside, 0 if atomic.
+ * @qlnk is expected to be empty.
+ */
+static int vzquota_qlnk_fill(struct vz_quota_ilink *qlnk,
+		struct inode *inode,
+		struct vz_quota_master *qmblk)
+{
+	if (qmblk != VZ_QUOTA_BAD)
+		qmblk_get(qmblk);
+	qlnk->qmblk = qmblk;
+
+#if defined(CONFIG_VZ_QUOTA_UGID)
+	if (qmblk != VZ_QUOTA_BAD &&
+	    !VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb) &&
+	    (qmblk->dq_flags & VZDQUG_ON)) {
+		struct vz_quota_ugid *quid, *qgid;
+
+		spin_unlock(&dcache_lock);
+		inode_qmblk_unlock(inode->i_sb);
+
+		down(&qmblk->dq_sem);
+		quid = __vzquota_find_ugid(qmblk, inode->i_uid, USRQUOTA, 0);
+		qgid = __vzquota_find_ugid(qmblk, inode->i_gid, GRPQUOTA, 0);
+		up(&qmblk->dq_sem);
+
+		inode_qmblk_lock(inode->i_sb);
+		spin_lock(&dcache_lock);
+		qlnk->qugid[USRQUOTA] = quid;
+		qlnk->qugid[GRPQUOTA] = qgid;
+		return 1;
+	}
+#endif
+
+	return 0;
+}
+
+#if defined(CONFIG_VZ_QUOTA_UGID)
+/**
+ * vzquota_qlnk_fill_attr - fill vz_quota_ilink content for uid, gid
+ *
+ * This function is a helper for vzquota_transfer, and differs from
+ * vzquota_qlnk_fill only by locking.
+ */
+static int vzquota_qlnk_fill_attr(struct vz_quota_ilink *qlnk,
+		struct inode *inode,
+		struct iattr *iattr,
+		int mask,
+		struct vz_quota_master *qmblk)
+{
+	qmblk_get(qmblk);
+	qlnk->qmblk = qmblk;
+
+	if (mask) {
+		struct vz_quota_ugid *quid, *qgid;
+
+		quid = qgid = NULL; /* to make gcc happy */
+		if (!(mask & (1 << USRQUOTA)))
+			quid = vzquota_get_ugid(INODE_QLNK(inode)->
+							qugid[USRQUOTA]);
+		if (!(mask & (1 << GRPQUOTA)))
+			qgid = vzquota_get_ugid(INODE_QLNK(inode)->
+							qugid[GRPQUOTA]);
+
+		qmblk_data_write_unlock(qmblk);
+		inode_qmblk_unlock(inode->i_sb);
+
+		down(&qmblk->dq_sem);
+		if (mask & (1 << USRQUOTA))
+			quid = __vzquota_find_ugid(qmblk, iattr->ia_uid,
+					USRQUOTA, 0);
+		if (mask & (1 << GRPQUOTA))
+			qgid = __vzquota_find_ugid(qmblk, iattr->ia_gid,
+					GRPQUOTA, 0);
+		up(&qmblk->dq_sem);
+
+		inode_qmblk_lock(inode->i_sb);
+		qmblk_data_write_lock(qmblk);
+		qlnk->qugid[USRQUOTA] = quid;
+		qlnk->qugid[GRPQUOTA] = qgid;
+		return 1;
+	}
+
+	return 0;
+}
+#endif
+
+/**
+ * __vzquota_inode_init - make sure inode's qlnk is initialized
+ *
+ * May be called if qlnk is already initialized, detects this situation itself.
+ * Called under inode_qmblk_lock.
+ */
+static void __vzquota_inode_init(struct inode *inode, unsigned char origin)
+{
+	if (inode->i_dquot[USRQUOTA] == NODQUOT) {
+		vzquota_qlnk_init(INODE_QLNK(inode));
+		inode->i_dquot[USRQUOTA] = (void *)~(unsigned long)NODQUOT;
+	}
+	set_qlnk_origin(INODE_QLNK(inode), origin);
+}
+
+/**
+ * vzquota_inode_drop - destroy VZ quota information in the inode
+ *
+ * Inode must not be externally accessible or dirty.
+ */
+static void vzquota_inode_drop(struct inode *inode)
+{
+	struct vz_quota_ilink qlnk;
+
+	vzquota_qlnk_init(&qlnk);
+	inode_qmblk_lock(inode->i_sb);
+	vzquota_qlnk_swap(&qlnk, INODE_QLNK(inode));
+	set_qlnk_origin(INODE_QLNK(inode), VZ_QUOTAO_DRCAL);
+	inode->i_dquot[USRQUOTA] = NODQUOT;
+	inode_qmblk_unlock(inode->i_sb);
+	vzquota_qlnk_destroy(&qlnk);
+}
+
+/**
+ * vzquota_inode_qmblk_set - initialize inode's qlnk
+ * @inode: inode to be initialized
+ * @qmblk: quota master block to which this inode should belong (may be BAD)
+ * @qlnk: placeholder to store data to resolve locking issues
+ *
+ * Returns 1 if locks were dropped and rechecks possibly needed, 0 otherwise.
+ * Called under dcache_lock and inode_qmblk locks.
+ * @qlnk will be destroyed in the caller chain.
+ *
+ * It is not mandatory to restart parent checks since quota on/off currently
+ * shrinks dentry tree and checks that there are not outside references.
+ * But if at some time that shink is removed, restarts will be required.
+ * Additionally, the restarts prevent inconsistencies if the dentry tree
+ * changes (inode is moved).  This is not a big deal, but anyway...
+ */
+static int vzquota_inode_qmblk_set(struct inode *inode,
+		struct vz_quota_master *qmblk,
+		struct vz_quota_ilink *qlnk)
+{
+	if (qmblk == NULL) {
+		printk(KERN_ERR "VZDQ: NULL in set, orig {%u, %u}, "
+				"dev %s, inode %lu, fs %s\n",
+				INODE_QLNK(inode)->origin[0],
+				INODE_QLNK(inode)->origin[1],
+				inode->i_sb->s_id, inode->i_ino,
+				inode->i_sb->s_type->name);
+		printk(KERN_ERR "current %d (%s), VE %d\n",
+				current->pid, current->comm,
+				VEID(get_exec_env()));
+		dump_stack();
+		qmblk = VZ_QUOTA_BAD;
+	}
+	while (1) {
+		if (vzquota_qlnk_is_empty(qlnk) &&
+		    vzquota_qlnk_fill(qlnk, inode, qmblk))
+			return 1;
+		if (qlnk->qmblk == qmblk)
+			break;
+		if (vzquota_qlnk_reinit_locked(qlnk, inode))
+			return 1;
+	}
+	vzquota_qlnk_swap(qlnk, INODE_QLNK(inode));
+	set_qlnk_origin(INODE_QLNK(inode), VZ_QUOTAO_QSET);
+	return 0;
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * vzquota_inode_qmblk (inode -> qmblk lookup) parts
+ *
+ * --------------------------------------------------------------------- */
+
+static int vzquota_dparents_check_attach(struct inode *inode)
+{
+	if (!list_empty(&inode->i_dentry))
+		return 0;
+	printk(KERN_ERR "VZDQ: no parent for "
+			"dev %s, inode %lu, fs %s\n",
+			inode->i_sb->s_id,
+			inode->i_ino,
+			inode->i_sb->s_type->name);
+	return -1;
+}
+
+static struct inode *vzquota_dparents_check_actual(struct inode *inode)
+{
+	struct dentry *de;
+
+	list_for_each_entry(de, &inode->i_dentry, d_alias) {
+		if (de->d_parent == de) /* detached dentry, perhaps */
+			continue;
+		/* first access to parent, make sure its qlnk initialized */
+		__vzquota_inode_init(de->d_parent->d_inode, VZ_QUOTAO_ACT);
+		if (!VZ_QUOTA_IS_ACTUAL(de->d_parent->d_inode))
+			return de->d_parent->d_inode;
+	}
+	return NULL;
+}
+
+static struct vz_quota_master *vzquota_dparents_check_same(struct inode *inode)
+{
+	struct dentry *de;
+	struct vz_quota_master *qmblk;
+
+	qmblk = NULL;
+	list_for_each_entry(de, &inode->i_dentry, d_alias) {
+		if (de->d_parent == de) /* detached dentry, perhaps */
+			continue;
+		if (qmblk == NULL) {
+			qmblk = INODE_QLNK(de->d_parent->d_inode)->qmblk;
+			continue;
+		}
+		if (INODE_QLNK(de->d_parent->d_inode)->qmblk != qmblk) {
+			printk(KERN_WARNING "VZDQ: multiple quotas for "
+					"dev %s, inode %lu, fs %s\n",
+					inode->i_sb->s_id,
+					inode->i_ino,
+					inode->i_sb->s_type->name);
+			qmblk = VZ_QUOTA_BAD;
+			break;
+		}
+	}
+	if (qmblk == NULL) {
+		printk(KERN_WARNING "VZDQ: not attached to tree, "
+				"dev %s, inode %lu, fs %s\n",
+				inode->i_sb->s_id,
+				inode->i_ino,
+				inode->i_sb->s_type->name);
+		qmblk = VZ_QUOTA_BAD;
+	}
+	return qmblk;
+}
+
+static void vzquota_dbranch_actualize(struct inode *inode,
+		struct inode *refinode)
+{
+	struct inode *pinode;
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ilink qlnk;
+
+	vzquota_qlnk_init(&qlnk);
+
+start:
+	if (inode == inode->i_sb->s_root->d_inode) {
+		/* filesystem root */
+		atomic_inc(&inode->i_count);
+		do {
+			qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
+		} while (vzquota_inode_qmblk_set(inode, qmblk, &qlnk));
+		goto out;
+	}
+
+	if (!vzquota_dparents_check_attach(inode)) {
+		pinode = vzquota_dparents_check_actual(inode);
+		if (pinode != NULL) {
+			inode = pinode;
+			goto start;
+		}
+	}
+
+	atomic_inc(&inode->i_count);
+	while (1) {
+		if (VZ_QUOTA_IS_ACTUAL(inode)) /* actualized without us */
+			break;
+		/*
+		 * Need to check parents again if we have slept inside
+		 * vzquota_inode_qmblk_set() in the loop.
+		 * If the state of parents is different, just return and repeat
+		 * the actualizing process again from the inode passed to
+		 * vzquota_inode_qmblk_recalc().
+		 */
+		if (!vzquota_dparents_check_attach(inode)) {
+			if (vzquota_dparents_check_actual(inode) != NULL)
+				break;
+			qmblk = vzquota_dparents_check_same(inode);
+		} else
+			qmblk = VZ_QUOTA_BAD;
+		if (!vzquota_inode_qmblk_set(inode, qmblk, &qlnk)){/* success */
+			set_qlnk_origin(INODE_QLNK(inode), VZ_QUOTAO_ACT);
+			break;
+		}
+	}
+
+out:
+	spin_unlock(&dcache_lock);
+	inode_qmblk_unlock(refinode->i_sb);
+	vzquota_qlnk_destroy(&qlnk);
+	iput(inode);
+	inode_qmblk_lock(refinode->i_sb);
+	spin_lock(&dcache_lock);
+}
+
+static void vzquota_dtree_qmblk_recalc(struct inode *inode,
+		struct vz_quota_ilink *qlnk)
+{
+	struct inode *pinode;
+	struct vz_quota_master *qmblk;
+
+	if (inode == inode->i_sb->s_root->d_inode) {
+		/* filesystem root */
+		do {
+			qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
+		} while (vzquota_inode_qmblk_set(inode, qmblk, qlnk));
+		return;
+	}
+
+start:
+	if (VZ_QUOTA_IS_ACTUAL(inode))
+		return;
+	/*
+	 * Here qmblk is (re-)initialized for all ancestors.
+	 * This is not a very efficient procedure, but it guarantees that
+	 * the quota tree is consistent (that is, the inode doesn't have two
+	 * ancestors with different qmblk).
+	 */
+	if (!vzquota_dparents_check_attach(inode)) {
+		pinode = vzquota_dparents_check_actual(inode);
+		if (pinode != NULL) {
+			vzquota_dbranch_actualize(pinode, inode);
+			goto start;
+		}
+		qmblk = vzquota_dparents_check_same(inode);
+	} else
+		qmblk = VZ_QUOTA_BAD;
+
+	if (vzquota_inode_qmblk_set(inode, qmblk, qlnk))
+		goto start;
+	set_qlnk_origin(INODE_QLNK(inode), VZ_QUOTAO_DTREE);
+}
+
+static void vzquota_det_qmblk_recalc(struct inode *inode,
+		struct vz_quota_ilink *qlnk)
+{
+	struct inode *parent;
+	struct vz_quota_master *qmblk;
+	char *msg;
+	int cnt;
+	time_t timeout;
+
+	cnt = 0;
+	parent = NULL;
+start:
+	/*
+	 * qmblk of detached inodes shouldn't be considered as not actual.
+	 * They are not in any dentry tree, so quota on/off shouldn't affect
+	 * them.
+	 */
+	if (!vzquota_qlnk_is_empty(INODE_QLNK(inode)))
+		return;
+
+	timeout = 3;
+	qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
+	/*
+	 * Scenario:
+	 *	open
+	 *	unlink
+	 * 	quotaon
+	 *	generic_delete_inode
+	 *
+	 * This is the first time vzquota sees inode. inode is outside of
+	 * vzquota area of interest, otherwise quotaon would have got -EBUSY
+	 * due to shrink_dcache_parent().
+	 * inode is almost completely destroyed, so don't intervene.
+	 * 
+	 * dev@:
+	 * However, there is a small race here...
+	 * dput() first removes itself from all the lists,
+	 * so shrink_dcache_parent() can succeed while dentry_iput is not
+	 * done yet.
+	 */
+	if (inode->i_state & I_FREEING)
+		goto set;
+
+	msg = "detached inode not in creation";
+	if (inode->i_op != VZ_QUOTA_EMPTY_IOPS)
+		goto fail;
+	qmblk = VZ_QUOTA_BAD;
+	msg = "unexpected creation context";
+	if (!vzquota_cur_qmblk_check())
+		goto fail;
+	timeout = 0;
+	parent = vzquota_cur_qmblk_fetch();
+	msg = "uninitialized parent";
+	if (vzquota_qlnk_is_empty(INODE_QLNK(parent)))
+		goto fail;
+	msg = "parent not in tree";
+	if (list_empty(&parent->i_dentry))
+		goto fail;
+	msg = "parent has 0 refcount";
+	if (!atomic_read(&parent->i_count))
+		goto fail;
+	msg = "parent has different sb";
+	if (parent->i_sb != inode->i_sb)
+		goto fail;
+	if (!VZ_QUOTA_IS_ACTUAL(parent)) {
+		vzquota_dbranch_actualize(parent, inode);
+		goto start;
+	}
+
+	qmblk = INODE_QLNK(parent)->qmblk;
+set:
+	if (vzquota_inode_qmblk_set(inode, qmblk, qlnk))
+		goto start;
+	set_qlnk_origin(INODE_QLNK(inode), VZ_QUOTAO_DET);
+	return;
+
+fail:
+	{
+		struct timeval tv, tvo;
+		do_gettimeofday(&tv);
+		memcpy(&tvo, __VZ_QUOTA_TSTAMP(inode->i_sb), sizeof(tvo));
+		tv.tv_sec -= tvo.tv_sec;
+		if (tv.tv_usec < tvo.tv_usec) {
+			tv.tv_sec--;
+			tv.tv_usec += USEC_PER_SEC - tvo.tv_usec;
+		} else
+			tv.tv_usec -= tvo.tv_usec;
+		if (tv.tv_sec < timeout)
+			goto set;
+		printk(KERN_ERR "VZDQ: %s, orig {%u, %u},"
+			" dev %s, inode %lu, fs %s\n",
+			msg,
+			INODE_QLNK(inode)->origin[0],
+			INODE_QLNK(inode)->origin[1],
+			inode->i_sb->s_id, inode->i_ino,
+			inode->i_sb->s_type->name);
+		printk(KERN_ERR "i_count %u, ", atomic_read(&inode->i_count));
+		printk(KERN_ERR "i_mode %o, ", inode->i_mode);
+		printk(KERN_ERR "i_state %lx, ", inode->i_state);
+		printk(KERN_ERR "i_flags %x\n", inode->i_flags);
+		printk(KERN_ERR "i_op %p, vfs_empty_iops %p, "
+				"i_fop %p, i_mapping %p\n",
+				inode->i_op, &vfs_empty_iops,
+				inode->i_fop, inode->i_mapping);
+		if (!cnt++) {
+			printk(KERN_ERR "current %d (%s), VE %d,"
+				" time %ld.%06ld\n",
+				current->pid, current->comm,
+				VEID(get_exec_env()),
+				tv.tv_sec, (long)tv.tv_usec);
+			dump_stack();
+		}
+		if (parent != NULL)
+			printk(KERN_ERR "VZDQ: parent of %lu is %lu\n",
+				inode->i_ino, parent->i_ino);
+	}
+	goto set;
+}
+
+static void vzquota_inode_qmblk_recalc(struct inode *inode,
+		struct vz_quota_ilink *qlnk)
+{
+	spin_lock(&dcache_lock);
+	if (!list_empty(&inode->i_dentry))
+		vzquota_dtree_qmblk_recalc(inode, qlnk);
+	else
+		vzquota_det_qmblk_recalc(inode, qlnk);
+	spin_unlock(&dcache_lock);
+}
+
+/**
+ * vzquota_inode_qmblk - obtain inode's qmblk
+ *
+ * Returns qmblk with refcounter taken, %NULL if not under
+ * VZ quota or %VZ_QUOTA_BAD.
+ *
+ * FIXME: This function should be removed when vzquota_find_qmblk /
+ * get_quota_root / vzquota_dstat code is cleaned up.
+ */
+struct vz_quota_master *vzquota_inode_qmblk(struct inode *inode)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ilink qlnk;
+
+	might_sleep();
+
+	if (inode->i_sb->dq_op != &vz_quota_operations)
+		return NULL;
+#if defined(VZ_QUOTA_UNLOAD)
+#error Make sure qmblk does not disappear
+#endif
+
+	vzquota_qlnk_init(&qlnk);
+	inode_qmblk_lock(inode->i_sb);
+	__vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
+
+	if (vzquota_qlnk_is_empty(INODE_QLNK(inode)) ||
+	    !VZ_QUOTA_IS_ACTUAL(inode))
+		vzquota_inode_qmblk_recalc(inode, &qlnk);
+
+	qmblk = INODE_QLNK(inode)->qmblk;
+	if (qmblk != VZ_QUOTA_BAD) {
+		if (!VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb))
+			qmblk_get(qmblk);
+		else
+			qmblk = NULL;
+	}
+
+	inode_qmblk_unlock(inode->i_sb);
+	vzquota_qlnk_destroy(&qlnk);
+	return qmblk;
+}
+
+/**
+ * vzquota_find_qmblk - helper to emulate quota on virtual filesystems
+ *
+ * This function finds a quota master block corresponding to the root of
+ * a virtual filesystem.
+ * Returns a quota master block with reference taken, or %NULL if not under
+ * quota, or %VZ_QUOTA_BAD if quota inconsistency is found (and all allocation
+ * operations will fail).
+ *
+ * Note: this function uses vzquota_inode_qmblk().
+ * The latter is a rather confusing function: it returns qmblk that used to be
+ * on the inode some time ago (without guarantee that it still has any
+ * relations to the inode).  So, vzquota_find_qmblk() leaves it up to the
+ * caller to think whether the inode could have changed its qmblk and what to
+ * do in that case.
+ * Currently, the callers appear to not care :(
+ */
+struct vz_quota_master *vzquota_find_qmblk(struct super_block *sb)
+{
+	struct inode *qrinode;
+	struct vz_quota_master *qmblk;
+
+	qmblk = NULL;
+	qrinode = NULL;
+	if (sb->s_op->get_quota_root != NULL)
+		qrinode = sb->s_op->get_quota_root(sb);
+	if (qrinode != NULL)
+		qmblk = vzquota_inode_qmblk(qrinode);
+	return qmblk;
+}
+
+/* ----------------------------------------------------------------------
+ *
+ * Calls from quota operations
+ *
+ * --------------------------------------------------------------------- */
+
+/**
+ * vzquota_inode_init_call - call from DQUOT_INIT
+ */
+void vzquota_inode_init_call(struct inode *inode)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_datast data;
+
+	/* initializes inode's quota inside */
+	qmblk = vzquota_inode_data(inode, &data);
+	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+		vzquota_data_unlock(inode, &data);
+
+	/*
+	 * The check is needed for repeated new_inode() calls from a single
+	 * ext3 call like create or mkdir in case of -ENOSPC.
+	 */
+	spin_lock(&dcache_lock);
+	if (!list_empty(&inode->i_dentry))
+		vzquota_cur_qmblk_set(inode);
+	spin_unlock(&dcache_lock);
+}
+
+/**
+ * vzquota_inode_drop_call - call from DQUOT_DROP
+ */
+void vzquota_inode_drop_call(struct inode *inode)
+{
+	vzquota_inode_drop(inode);
+}
+
+/**
+ * vzquota_inode_data - initialize (if nec.) and lock inode quota ptrs
+ * @inode: the inode
+ * @data: storage space
+ *
+ * Returns: qmblk is NULL or VZ_QUOTA_BAD or actualized qmblk.
+ * On return if qmblk is neither NULL nor VZ_QUOTA_BAD:
+ *   qmblk in inode's qlnk is the same as returned,
+ *   ugid pointers inside inode's qlnk are valid,
+ *   some locks are taken (and should be released by vzquota_data_unlock).
+ * If qmblk is NULL or VZ_QUOTA_BAD, locks are NOT taken.
+ */
+struct vz_quota_master *vzquota_inode_data(struct inode *inode,
+		struct vz_quota_datast *data)
+{
+	struct vz_quota_master *qmblk;
+
+	might_sleep();
+
+	vzquota_qlnk_init(&data->qlnk);
+	inode_qmblk_lock(inode->i_sb);
+	if (unlikely(inode->i_flags & S_NOQUOTA)) {
+		inode_qmblk_unlock(inode->i_sb);
+		return NULL;
+	}
+	__vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
+
+	if (vzquota_qlnk_is_empty(INODE_QLNK(inode)) ||
+	    !VZ_QUOTA_IS_ACTUAL(inode))
+		vzquota_inode_qmblk_recalc(inode, &data->qlnk);
+
+	qmblk = INODE_QLNK(inode)->qmblk;
+	if (qmblk != VZ_QUOTA_BAD) {
+		if (!VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb)) {
+			/*
+			 * Note that in the current implementation,
+			 * inode_qmblk_lock can theoretically be dropped here.
+			 * This place is serialized with quota_off because
+			 * quota_off fails when there are extra dentry
+			 * references and syncs inodes before removing quota
+			 * information from them.
+			 * However, quota usage information should stop being
+			 * updated immediately after vzquota_off.
+			 */
+			qmblk_data_write_lock(qmblk);
+		} else {
+			inode_qmblk_unlock(inode->i_sb);
+			qmblk = NULL;
+		}
+	} else {
+		inode_qmblk_unlock(inode->i_sb);
+	}
+	return qmblk;
+}
+
+void vzquota_data_unlock(struct inode *inode,
+		struct vz_quota_datast *data)
+{
+	qmblk_data_write_unlock(INODE_QLNK(inode)->qmblk);
+	inode_qmblk_unlock(inode->i_sb);
+	vzquota_qlnk_destroy(&data->qlnk);
+}
+
+#if defined(CONFIG_VZ_QUOTA_UGID)
+/**
+ * vzquota_inode_transfer_call - call from vzquota_transfer
+ */
+int vzquota_inode_transfer_call(struct inode *inode, struct iattr *iattr)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_datast data;
+	struct vz_quota_ilink qlnew;
+	int mask;
+	int ret;
+
+	might_sleep();
+	vzquota_qlnk_init(&qlnew);
+start:
+	qmblk = vzquota_inode_data(inode, &data);
+	ret = NO_QUOTA;
+	if (qmblk == VZ_QUOTA_BAD)
+		goto out_destr;
+	ret = QUOTA_OK;
+	if (qmblk == NULL)
+		goto out_destr;
+	qmblk_get(qmblk);
+
+	ret = QUOTA_OK;
+	if (!(qmblk->dq_flags & VZDQUG_ON))
+		/* no ugid quotas */
+		goto out_unlock;
+
+	mask = 0;
+	if ((iattr->ia_valid & ATTR_UID) && iattr->ia_uid != inode->i_uid)
+		mask |= 1 << USRQUOTA;
+	if ((iattr->ia_valid & ATTR_GID) && iattr->ia_gid != inode->i_gid)
+		mask |= 1 << GRPQUOTA;
+	while (1) {
+		if (vzquota_qlnk_is_empty(&qlnew) &&
+		    vzquota_qlnk_fill_attr(&qlnew, inode, iattr, mask, qmblk))
+			break;
+		if (qlnew.qmblk == INODE_QLNK(inode)->qmblk &&
+		    qlnew.qmblk == qmblk)
+			goto finish;
+		if (vzquota_qlnk_reinit_attr(&qlnew, inode, qmblk))
+			break;
+	}
+
+	/* prepare for restart */
+	vzquota_data_unlock(inode, &data);
+	qmblk_put(qmblk);
+	goto start;
+
+finish:
+	/* all references obtained successfully */
+	ret = vzquota_transfer_usage(inode, mask, &qlnew);
+	if (!ret) {
+		vzquota_qlnk_swap(&qlnew, INODE_QLNK(inode));
+		set_qlnk_origin(INODE_QLNK(inode), VZ_QUOTAO_TRANS);
+	}
+out_unlock:
+	vzquota_data_unlock(inode, &data);
+	qmblk_put(qmblk);
+out_destr:
+	vzquota_qlnk_destroy(&qlnew);
+	return ret;
+}
+#endif
+
+int vzquota_rename_check(struct inode *inode,
+		struct inode *old_dir, struct inode *new_dir)
+{
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ilink qlnk1, qlnk2;
+	int c, ret;
+
+	if (inode->i_sb != old_dir->i_sb || inode->i_sb != new_dir->i_sb)
+		return -1;
+
+	might_sleep();
+
+	vzquota_qlnk_init(&qlnk1);
+	vzquota_qlnk_init(&qlnk2);
+	inode_qmblk_lock(inode->i_sb);
+	__vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
+	__vzquota_inode_init(old_dir, VZ_QUOTAO_INICAL);
+	__vzquota_inode_init(new_dir, VZ_QUOTAO_INICAL);
+
+	do {
+		c = 0;
+		if (vzquota_qlnk_is_empty(INODE_QLNK(inode)) ||
+		    !VZ_QUOTA_IS_ACTUAL(inode)) {
+			vzquota_inode_qmblk_recalc(inode, &qlnk1);
+			c++;
+		}
+		if (vzquota_qlnk_is_empty(INODE_QLNK(new_dir)) ||
+		    !VZ_QUOTA_IS_ACTUAL(new_dir)) {
+			vzquota_inode_qmblk_recalc(new_dir, &qlnk2);
+			c++;
+		}
+	} while (c);
+
+	ret = 0;
+	qmblk = INODE_QLNK(inode)->qmblk;
+	if (qmblk != INODE_QLNK(new_dir)->qmblk) {
+		ret = -1;
+		if (qmblk != VZ_QUOTA_BAD &&
+		    !VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb) &&
+		    qmblk->dq_root_dentry->d_inode == inode &&
+		    VZ_QUOTA_IS_NOQUOTA(INODE_QLNK(new_dir)->qmblk,
+			    				inode->i_sb) &&
+		    VZ_QUOTA_IS_NOQUOTA(INODE_QLNK(old_dir)->qmblk,
+			    				inode->i_sb))
+			/* quota root rename is allowed */
+			ret = 0;
+	}
+
+	inode_qmblk_unlock(inode->i_sb);
+	vzquota_qlnk_destroy(&qlnk2);
+	vzquota_qlnk_destroy(&qlnk1);
+	return ret;
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * qmblk-related parts of on/off operations
+ *
+ * --------------------------------------------------------------------- */
+
+/**
+ * vzquota_check_dtree - check dentry tree if quota on/off is allowed
+ *
+ * This function doesn't allow quota to be turned on/off if some dentries in
+ * the tree have external references.
+ * In addition to technical reasons, it enforces user-space correctness:
+ * current usage (taken from or reported to the user space) can be meaningful
+ * and accurate only if the tree is not being modified.
+ * Side effect: additional vfsmount structures referencing the tree (bind
+ * mounts of tree nodes to some other places) are not allowed at on/off time.
+ */
+int vzquota_check_dtree(struct vz_quota_master *qmblk, int off)
+{
+	struct dentry *dentry;
+	int err, count;
+
+	err = -EBUSY;
+	dentry = qmblk->dq_root_dentry;
+
+	if (d_unhashed(dentry) && dentry != dentry->d_sb->s_root)
+		goto unhashed;
+
+	/* attempt to shrink */
+  	if (!list_empty(&dentry->d_subdirs)) {
+		spin_unlock(&dcache_lock);
+		inode_qmblk_unlock(dentry->d_sb);
+		shrink_dcache_parent(dentry);
+		inode_qmblk_lock(dentry->d_sb);
+		spin_lock(&dcache_lock);
+		if (!list_empty(&dentry->d_subdirs))
+			goto out;
+
+		count = 1;
+		if (dentry == dentry->d_sb->s_root)
+			count += 2;	/* sb and mnt refs */
+		if (atomic_read(&dentry->d_count) < count) {
+			printk(KERN_ERR "%s: too small count %d vs %d.\n",
+					__FUNCTION__,
+					atomic_read(&dentry->d_count), count);
+			goto out;
+		}
+		if (atomic_read(&dentry->d_count) > count)
+			goto out;
+	}
+
+	err = 0;
+out:
+	return err;
+
+unhashed:
+	/*
+	 * Quota root is removed.
+	 * Allow to turn quota off, but not on.
+	 */
+	if (off)
+		err = 0;
+	goto out;
+}
+
+int vzquota_on_qmblk(struct super_block *sb, struct inode *inode,
+		struct vz_quota_master *qmblk)
+{
+	struct vz_quota_ilink qlnk;
+	struct vz_quota_master *qold, *qnew;
+	int err;
+
+	might_sleep();
+
+	qold = NULL;
+	qnew = vzquota_alloc_fake();
+	if (qnew == NULL)
+		return -ENOMEM;
+
+	vzquota_qlnk_init(&qlnk);
+	inode_qmblk_lock(sb);
+	__vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
+
+	spin_lock(&dcache_lock);
+	while (1) {
+		err = vzquota_check_dtree(qmblk, 0);
+		if (err)
+			break;
+		if (!vzquota_inode_qmblk_set(inode, qmblk, &qlnk))
+			break;
+	}
+	set_qlnk_origin(INODE_QLNK(inode), VZ_QUOTAO_ON);
+	spin_unlock(&dcache_lock);
+
+	if (!err) {
+		qold = __VZ_QUOTA_NOQUOTA(sb);
+		qold->dq_flags |= VZDQ_NOACT;
+		__VZ_QUOTA_NOQUOTA(sb) = qnew;
+	}
+
+	inode_qmblk_unlock(sb);
+	vzquota_qlnk_destroy(&qlnk);
+	if (qold != NULL)
+		qmblk_put(qold);
+
+	return err;
+}
+
+int vzquota_off_qmblk(struct super_block *sb, struct vz_quota_master *qmblk)
+{
+	int ret;
+
+	ret = 0;
+	inode_qmblk_lock(sb);
+
+	spin_lock(&dcache_lock);
+	if (vzquota_check_dtree(qmblk, 1))
+		ret = -EBUSY;
+	spin_unlock(&dcache_lock);
+
+	if (!ret)
+		qmblk->dq_flags |= VZDQ_NOACT | VZDQ_NOQUOT;
+	inode_qmblk_unlock(sb);
+	return ret;
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * External interfaces
+ *
+ * ---------------------------------------------------------------------*/
+
+static int vzquota_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int err;
+
+	switch (cmd) {
+	case VZCTL_QUOTA_NEW_CTL: {
+		struct vzctl_quotactl qb;
+
+		err = -EFAULT;
+		if (copy_from_user(&qb, (void __user *)arg, sizeof(qb)))
+			break;
+		err = do_vzquotactl(qb.cmd, qb.quota_id,
+				qb.qstat, qb.ve_root, 0);
+		break;
+	}
+#ifdef CONFIG_VZ_QUOTA_UGID
+	case VZCTL_QUOTA_UGID_CTL: {
+		struct vzctl_quotaugidctl qub;
+
+		err = -EFAULT;
+		if (copy_from_user(&qub, (void __user *)arg, sizeof(qub)))
+			break;
+		err = do_vzquotaugidctl(qub.cmd, qub.quota_id,
+				qub.ugid_index, qub.ugid_size, qub.addr, 0);
+		break;
+	}
+#endif
+	default:
+		err = -ENOTTY;
+	}
+	return err;
+}
+
+#ifdef CONFIG_COMPAT
+static int compat_vzquota_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int err;
+
+	switch (cmd) {
+	case VZCTL_COMPAT_QUOTA_CTL: {
+		struct compat_vzctl_quotactl cs;
+
+		err = -EFAULT;
+		if (copy_from_user(&cs, (void *)arg, sizeof(cs)))
+			break;
+		err = do_vzquotactl(cs.cmd, cs.quota_id,
+				compat_ptr(cs.qstat),
+				compat_ptr(cs.ve_root), 1);
+		break;
+	}
+#ifdef CONFIG_VZ_QUOTA_UGID
+	case VZCTL_COMPAT_QUOTA_UGID_CTL: {
+		struct compat_vzctl_quotaugidctl cs;
+
+		err = -EFAULT;
+		if (copy_from_user(&cs, (void *)arg, sizeof(cs)))
+			break;
+
+		err = do_vzquotaugidctl(cs.cmd, cs.quota_id, cs.ugid_index,
+				cs.ugid_size, compat_ptr(cs.addr), 1);
+		break;
+	}
+#endif
+	default:
+		err = -ENOIOCTLCMD;
+	}
+	return err;
+}
+#endif
+
+static struct vzioctlinfo vzdqcalls = {
+	.type		= VZDQCTLTYPE,
+	.ioctl		= vzquota_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= compat_vzquota_ioctl,
+#endif
+	.owner		= THIS_MODULE,
+};
+
+/**
+ * vzquota_dstat - get quota usage info for virtual superblock
+ */
+static int vzquota_dstat(struct super_block *super, struct dq_stat *qstat)
+{
+	struct vz_quota_master *qmblk;
+
+	qmblk = vzquota_find_qmblk(super);
+	if (qmblk == NULL)
+		return -ENOENT;
+	if (qmblk == VZ_QUOTA_BAD) {
+		memset(qstat, 0, sizeof(*qstat));
+		return 0;
+	}
+
+	qmblk_data_read_lock(qmblk);
+	memcpy(qstat, &qmblk->dq_stat, sizeof(*qstat));
+	qmblk_data_read_unlock(qmblk);
+	qmblk_put(qmblk);
+	return 0;
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Init/exit helpers
+ *
+ * ---------------------------------------------------------------------*/
+
+static int vzquota_cache_init(void)
+{
+	int i;
+
+	vzquota_cachep = kmem_cache_create("vz_quota_master",
+					 sizeof(struct vz_quota_master),
+					 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (vzquota_cachep == NULL) {
+		printk(KERN_ERR "Cannot create VZ_QUOTA SLAB cache\n");
+		goto nomem2;
+	}
+	for (i = 0; i < VZ_QUOTA_HASH_SIZE; i++)
+		INIT_LIST_HEAD(&vzquota_hash_table[i]);
+
+	return 0;
+
+nomem2:
+	return -ENOMEM;
+}
+
+static void vzquota_cache_release(void)
+{
+	int i;
+
+	/* sanity check */
+	for (i = 0; i < VZ_QUOTA_HASH_SIZE; i++)
+		if (!list_empty(&vzquota_hash_table[i]))
+			BUG();
+
+	/* release caches */
+	if (kmem_cache_destroy(vzquota_cachep))
+		printk(KERN_ERR
+			"VZQUOTA: vz_quota_master kmem_cache_destroy failed\n");
+	vzquota_cachep = NULL;
+}
+
+static int quota_notifier_call(struct vnotifier_block *self,
+		unsigned long n, void *data, int err)
+{
+	struct virt_info_quota *viq;
+	struct super_block *sb;
+
+	viq = (struct virt_info_quota *)data;
+	switch (n) {
+	case VIRTINFO_QUOTA_ON:
+		err = NOTIFY_BAD;
+		if (!try_module_get(THIS_MODULE))
+			break;
+		sb = viq->super;
+		memset(&sb->s_dquot.info, 0, sizeof(sb->s_dquot.info));
+		INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
+		INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
+		err = NOTIFY_OK;
+		break;
+	case VIRTINFO_QUOTA_OFF:
+		module_put(THIS_MODULE);
+		err = NOTIFY_OK;
+		break;
+	case VIRTINFO_QUOTA_GETSTAT:
+		err = NOTIFY_BAD;
+		if (vzquota_dstat(viq->super, viq->qstat))
+			break;
+		err = NOTIFY_OK;
+		break;
+	case VIRTINFO_QUOTA_DISABLE:
+		err = NOTIFY_OK;
+		vzquota_inode_off((struct inode *)data);
+		break;
+	}
+	return err;
+}
+
+struct vnotifier_block quota_notifier_block = {
+	.notifier_call = quota_notifier_call,
+	.priority = INT_MAX,
+};
+
+/* ----------------------------------------------------------------------
+ *
+ * Init/exit procedures
+ *
+ * ---------------------------------------------------------------------*/
+
+static int __init vzquota_init(void)
+{
+	int err;
+
+	if ((err = vzquota_cache_init()) != 0)
+		goto out_cache;
+
+	if ((err = vzquota_proc_init()) != 0)
+		goto out_proc;
+
+#ifdef CONFIG_VZ_QUOTA_UGID
+	if ((err = vzquota_ugid_init()) != 0)
+		goto out_ugid;
+#endif
+
+	init_MUTEX(&vz_quota_sem);
+	vzioctl_register(&vzdqcalls);
+	virtinfo_notifier_register(VITYPE_QUOTA, &quota_notifier_block);
+#if defined(CONFIG_VZ_QUOTA_UGID) && defined(CONFIG_PROC_FS)
+	vzaquota_init();
+#endif
+
+	return 0;
+
+#ifdef CONFIG_VZ_QUOTA_UGID
+out_ugid:
+	vzquota_proc_release();
+#endif
+out_proc:
+	vzquota_cache_release();
+out_cache:
+	return err;
+}
+
+#if defined(VZ_QUOTA_UNLOAD)
+static void __exit vzquota_release(void)
+{
+	virtinfo_notifier_unregister(VITYPE_QUOTA, &quota_notifier_block);
+	vzioctl_unregister(&vzdqcalls);
+#ifdef CONFIG_VZ_QUOTA_UGID
+#ifdef CONFIG_PROC_FS
+	vzaquota_fini();
+#endif
+	vzquota_ugid_release();
+#endif
+	vzquota_proc_release();
+	vzquota_cache_release();
+}
+#endif
+
+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
+MODULE_DESCRIPTION("Virtuozzo Disk Quota");
+MODULE_LICENSE("GPL v2");
+
+module_init(vzquota_init)
+#if defined(VZ_QUOTA_UNLOAD)
+module_exit(vzquota_release)
+#endif
diff -upr linux-2.6.16.46-0.12.orig/fs/xattr.c linux-2.6.16.46-0.12-027test011/fs/xattr.c
--- linux-2.6.16.46-0.12.orig/fs/xattr.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/fs/xattr.c	2007-08-28 17:35:36.000000000 +0400
@@ -97,7 +97,7 @@ out:
 	mutex_unlock(&inode->i_mutex);
 	return error;
 }
-EXPORT_SYMBOL_GPL(vfs_setxattr);
+EXPORT_SYMBOL(vfs_setxattr);
 
 ssize_t
 vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size)
@@ -133,7 +133,7 @@ vfs_getxattr(struct dentry *dentry, char
 
 	return error;
 }
-EXPORT_SYMBOL_GPL(vfs_getxattr);
+EXPORT_SYMBOL(vfs_getxattr);
 
 int
 vfs_removexattr(struct dentry *dentry, char *name)
@@ -160,7 +160,7 @@ vfs_removexattr(struct dentry *dentry, c
 		fsnotify_xattr(dentry);
 	return error;
 }
-EXPORT_SYMBOL_GPL(vfs_removexattr);
+EXPORT_SYMBOL(vfs_removexattr);
 
 
 /*
diff -upr linux-2.6.16.46-0.12.orig/include/asm-arm26/tlbflush.h linux-2.6.16.46-0.12-027test011/include/asm-arm26/tlbflush.h
--- linux-2.6.16.46-0.12.orig/include/asm-arm26/tlbflush.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/asm-arm26/tlbflush.h	2007-08-28 17:35:31.000000000 +0400
@@ -25,7 +25,7 @@ static inline void memc_update_all(void)
 {
 	struct task_struct *p;
 	cpu_memc_update_all(init_mm.pgd);
-	for_each_process(p) {
+	for_each_process_all(p) {
 		if (!p->mm)
 			continue;
 		cpu_memc_update_all(p->mm->pgd);
diff -upr linux-2.6.16.46-0.12.orig/include/asm-i386/bug.h linux-2.6.16.46-0.12-027test011/include/asm-i386/bug.h
--- linux-2.6.16.46-0.12.orig/include/asm-i386/bug.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/asm-i386/bug.h	2007-08-28 17:35:29.000000000 +0400
@@ -14,7 +14,10 @@
 #ifdef CONFIG_DEBUG_BUGVERBOSE
 #define BUG()				\
  __asm__ __volatile__(	"ud2\n"		\
+			"\t.byte 0x66\n"\
+			"\t.byte 0xb8\n" /* mov $xxx, %ax */\
 			"\t.word %c0\n"	\
+			"\t.byte 0xb8\n" /* mov $xxx, %eax */\
 			"\t.long %c1\n"	\
 			 : : "i" (__LINE__), "i" (__FILE__))
 #else
diff -upr linux-2.6.16.46-0.12.orig/include/asm-i386/elf.h linux-2.6.16.46-0.12-027test011/include/asm-i386/elf.h
--- linux-2.6.16.46-0.12.orig/include/asm-i386/elf.h	2007-08-24 19:28:37.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/asm-i386/elf.h	2007-08-28 17:35:33.000000000 +0400
@@ -108,7 +108,7 @@ typedef struct user_fxsr_struct elf_fpxr
    For the moment, we have only optimizations for the Intel generations,
    but that could change... */
 
-#define ELF_PLATFORM  (system_utsname.machine)
+#define ELF_PLATFORM  (ve_utsname.machine)
 
 #ifdef __KERNEL__
 #define SET_PERSONALITY(ex, ibcs2) do { } while (0)
@@ -147,8 +147,10 @@ extern int arch_setup_additional_pages(s
 
 #define ARCH_DLINFO						\
 do {								\
+	if (sysctl_at_vsyscall) {				\
 		NEW_AUX_ENT(AT_SYSINFO,	VSYSCALL_ENTRY);	\
 		NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL_BASE);	\
+	}							\
 } while (0)
 
 /*
diff -upr linux-2.6.16.46-0.12.orig/include/asm-i386/mman.h linux-2.6.16.46-0.12-027test011/include/asm-i386/mman.h
--- linux-2.6.16.46-0.12.orig/include/asm-i386/mman.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/asm-i386/mman.h	2007-08-28 17:35:30.000000000 +0400
@@ -10,6 +10,7 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_EXECPRIO	0x20000		/* do soft ubc charge */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff -upr linux-2.6.16.46-0.12.orig/include/asm-i386/msr.h linux-2.6.16.46-0.12-027test011/include/asm-i386/msr.h
--- linux-2.6.16.46-0.12.orig/include/asm-i386/msr.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/asm-i386/msr.h	2007-08-28 17:35:30.000000000 +0400
@@ -78,6 +78,21 @@ static inline void wrmsrl (unsigned long
 			  : "=a" (low), "=d" (high) \
 			  : "c" (counter))
 
+#ifdef CONFIG_SMP
+void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
+void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
+#else
+static inline void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
+{
+	rdmsr(msr_no, *l, *h);
+}
+
+static inline void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
+{
+	wrmsr(msr_no, l, h);
+}
+#endif
+
 /* symbolic names for some interesting MSRs */
 /* Intel defined MSRs. */
 #define MSR_IA32_P5_MC_ADDR		0
diff -upr linux-2.6.16.46-0.12.orig/include/asm-i386/nmi.h linux-2.6.16.46-0.12-027test011/include/asm-i386/nmi.h
--- linux-2.6.16.46-0.12.orig/include/asm-i386/nmi.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/asm-i386/nmi.h	2007-08-28 17:35:29.000000000 +0400
@@ -17,6 +17,7 @@ typedef int (*nmi_callback_t)(struct pt_
  * set. Return 1 if the NMI was handled.
  */
 void set_nmi_callback(nmi_callback_t callback);
+void set_nmi_ipi_callback(nmi_callback_t callback);
  
 /** 
  * unset_nmi_callback
@@ -24,5 +25,6 @@ void set_nmi_callback(nmi_callback_t cal
  * Remove the handler previously set.
  */
 void unset_nmi_callback(void);
+void unset_nmi_ipi_callback(void);
  
 #endif /* ASM_NMI_H */
diff -upr linux-2.6.16.46-0.12.orig/include/asm-i386/processor.h linux-2.6.16.46-0.12-027test011/include/asm-i386/processor.h
--- linux-2.6.16.46-0.12.orig/include/asm-i386/processor.h	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/asm-i386/processor.h	2007-08-28 17:35:30.000000000 +0400
@@ -212,6 +212,21 @@ static inline unsigned int cpuid_edx(uns
 	return edx;
 }
 
+#ifdef CONFIG_SMP
+void cpuid_on_cpu(unsigned int cpu, u32 op, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
+u32 cpuid_eax_on_cpu(unsigned int cpu, u32 op);
+#else
+static inline void cpuid_on_cpu(unsigned int cpu, u32 op, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
+{
+	cpuid(op, eax, ebx, ecx, edx);
+}
+
+static inline u32 cpuid_eax_on_cpu(unsigned int cpu, u32 op)
+{
+	return cpuid_eax(op);
+}
+#endif
+
 #define load_cr3(pgdir) write_cr3(__pa(pgdir))
 
 /*
diff -upr linux-2.6.16.46-0.12.orig/include/asm-i386/thread_info.h linux-2.6.16.46-0.12-027test011/include/asm-i386/thread_info.h
--- linux-2.6.16.46-0.12.orig/include/asm-i386/thread_info.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/asm-i386/thread_info.h	2007-08-28 17:35:33.000000000 +0400
@@ -101,13 +101,13 @@ register unsigned long current_stack_poi
 	({							\
 		struct thread_info *ret;			\
 								\
-		ret = kmalloc(THREAD_SIZE, GFP_KERNEL);		\
+		ret = kmalloc(THREAD_SIZE, GFP_KERNEL_UBC);	\
 		if (ret)					\
 			memset(ret, 0, THREAD_SIZE);		\
 		ret;						\
 	})
 #else
-#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL)
+#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL_UBC)
 #endif
 
 #define free_thread_info(info)	kfree(info)
@@ -142,7 +142,8 @@ register unsigned long current_stack_poi
 #define TIF_SECCOMP		8	/* secure computing */
 #define TIF_RESTORE_SIGMASK	9	/* restore signal mask in do_signal() */
 #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
-#define TIF_MEMDIE		17
+#define TIF_FREEZE		17	/* Freeze request, atomic version of PF_FREEZE */
+#define TIF_MEMDIE		18
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
diff -upr linux-2.6.16.46-0.12.orig/include/asm-i386/timex.h linux-2.6.16.46-0.12-027test011/include/asm-i386/timex.h
--- linux-2.6.16.46-0.12.orig/include/asm-i386/timex.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/asm-i386/timex.h	2007-08-28 17:35:31.000000000 +0400
@@ -36,13 +36,17 @@ static inline cycles_t get_cycles (void)
 {
 	unsigned long long ret=0;
 
-#ifndef CONFIG_X86_TSC
-	if (!cpu_has_tsc)
-		return 0;
-#endif
-
 #if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
 	rdtscll(ret);
+#elif defined(CONFIG_VE)
+	/*
+	 * get_cycles is used in the following calculations:
+	 * - VPS idle and iowait times in kernel/shced.h
+	 * - task's sleep time to be shown with SyRq-t
+	 * - kstat latencies in linux/vzstat.h
+	 * - sched latency via wakeup_stamp in linux/ve_task.h
+	 */
+#warning "some of VPS statistics won't be correct without get_cycles() (kstat_lat, ve_idle, etc)"
 #endif
 	return ret;
 }
diff -upr linux-2.6.16.46-0.12.orig/include/asm-i386/unistd.h linux-2.6.16.46-0.12-027test011/include/asm-i386/unistd.h
--- linux-2.6.16.46-0.12.orig/include/asm-i386/unistd.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/asm-i386/unistd.h	2007-08-28 17:35:34.000000000 +0400
@@ -316,8 +316,20 @@
 #define __NR_pselect6		308
 #define __NR_ppoll		309
 #define __NR_unshare		310
+#define __NR_fairsched_mknod	500     /* FairScheduler syscalls */
+#define __NR_fairsched_rmnod	501
+#define __NR_fairsched_chwt	502
+#define __NR_fairsched_mvpr	503
+#define __NR_fairsched_rate	504
+#define __NR_fairsched_vcpus	505
+#define __NR_getluid		510
+#define __NR_setluid		511
+#define __NR_setublimit		512
+#define __NR_ubstat		513
+#define __NR_lchmod		516
+#define __NR_lutime		517
 
-#define NR_syscalls 311
+#define NR_syscalls 518
 
 /*
  * user-visible error numbers are in the range -1 - -128: see
diff -upr linux-2.6.16.46-0.12.orig/include/asm-ia64/mman.h linux-2.6.16.46-0.12-027test011/include/asm-ia64/mman.h
--- linux-2.6.16.46-0.12.orig/include/asm-ia64/mman.h	2007-08-24 19:28:08.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/asm-ia64/mman.h	2007-08-28 17:35:30.000000000 +0400
@@ -18,6 +18,7 @@
 #define MAP_NORESERVE	0x04000		/* don't check for reservations */
 #define MAP_POPULATE	0x08000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_EXECPRIO	0x20000		/* soft ubc charge */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff -upr linux-2.6.16.46-0.12.orig/include/asm-ia64/pgalloc.h linux-2.6.16.46-0.12-027test011/include/asm-ia64/pgalloc.h
--- linux-2.6.16.46-0.12.orig/include/asm-ia64/pgalloc.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/asm-ia64/pgalloc.h	2007-08-28 17:35:30.000000000 +0400
@@ -20,6 +20,8 @@
 #include <linux/page-flags.h>
 #include <linux/threads.h>
 
+#include <ub/ub_mem.h>
+
 #include <asm/mmu_context.h>
 
 DECLARE_PER_CPU(unsigned long *, __pgtable_quicklist);
@@ -38,7 +40,7 @@ static inline long pgtable_quicklist_tot
 	return ql_size;
 }
 
-static inline void *pgtable_quicklist_alloc(void)
+static inline void *pgtable_quicklist_alloc(int charge)
 {
 	unsigned long *ret = NULL;
 
@@ -46,13 +48,21 @@ static inline void *pgtable_quicklist_al
 
 	ret = pgtable_quicklist;
 	if (likely(ret != NULL)) {
+		if (ub_page_charge(virt_to_page(ret), 0,
+					charge ? __GFP_UBC|__GFP_SOFT_UBC : 0)) {
+			ret = NULL;
+			goto out;
+		}
+
 		pgtable_quicklist = (unsigned long *)(*ret);
 		ret[0] = 0;
 		--pgtable_quicklist_size;
+out:
 		preempt_enable();
 	} else {
 		preempt_enable();
-		ret = (unsigned long *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+		ret = (unsigned long *)__get_free_page(GFP_KERNEL | __GFP_ZERO |
+				(charge ? __GFP_UBC | __GFP_SOFT_UBC : 0));
 	}
 
 	return ret;
@@ -70,6 +80,7 @@ static inline void pgtable_quicklist_fre
 #endif
 
 	preempt_disable();
+	ub_page_uncharge(virt_to_page(pgtable_entry), 0);
 	*(unsigned long *)pgtable_entry = (unsigned long)pgtable_quicklist;
 	pgtable_quicklist = (unsigned long *)pgtable_entry;
 	++pgtable_quicklist_size;
@@ -78,7 +89,7 @@ static inline void pgtable_quicklist_fre
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	return pgtable_quicklist_alloc();
+	return pgtable_quicklist_alloc(1);
 }
 
 static inline void pgd_free(pgd_t * pgd)
@@ -95,7 +106,7 @@ pgd_populate(struct mm_struct *mm, pgd_t
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return pgtable_quicklist_alloc();
+	return pgtable_quicklist_alloc(1);
 }
 
 static inline void pud_free(pud_t * pud)
@@ -113,7 +124,7 @@ pud_populate(struct mm_struct *mm, pud_t
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return pgtable_quicklist_alloc();
+	return pgtable_quicklist_alloc(1);
 }
 
 static inline void pmd_free(pmd_t * pmd)
@@ -138,13 +149,14 @@ pmd_populate_kernel(struct mm_struct *mm
 static inline struct page *pte_alloc_one(struct mm_struct *mm,
 					 unsigned long addr)
 {
-	return virt_to_page(pgtable_quicklist_alloc());
+	void * pg = pgtable_quicklist_alloc(1);
+	return pg ? virt_to_page(pg) : NULL;
 }
 
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long addr)
 {
-	return pgtable_quicklist_alloc();
+	return pgtable_quicklist_alloc(0);
 }
 
 static inline void pte_free(struct page *pte)
diff -upr linux-2.6.16.46-0.12.orig/include/asm-ia64/processor.h linux-2.6.16.46-0.12-027test011/include/asm-ia64/processor.h
--- linux-2.6.16.46-0.12.orig/include/asm-ia64/processor.h	2007-08-24 19:28:10.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/asm-ia64/processor.h	2007-08-28 17:35:31.000000000 +0400
@@ -307,7 +307,7 @@ struct thread_struct {
 	regs->loadrs = 0;									\
 	regs->r8 = current->mm->dumpable;	/* set "don't zap registers" flag */		\
 	regs->r12 = new_sp - 16;	/* allocate 16 byte scratch area */			\
-	if (unlikely(!current->mm->dumpable)) {							\
+	if (unlikely(!current->mm->dumpable || !current->mm->vps_dumpable)) {			\
 		/*										\
 		 * Zap scratch regs to avoid leaking bits between processes with different	\
 		 * uid/privileges.								\
diff -upr linux-2.6.16.46-0.12.orig/include/asm-ia64/thread_info.h linux-2.6.16.46-0.12-027test011/include/asm-ia64/thread_info.h
--- linux-2.6.16.46-0.12.orig/include/asm-ia64/thread_info.h	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/asm-ia64/thread_info.h	2007-08-28 17:35:30.000000000 +0400
@@ -91,15 +91,18 @@ struct thread_info {
 #define TIF_SYSCALL_TRACE	3	/* syscall trace active */
 #define TIF_SYSCALL_AUDIT	4	/* syscall auditing active */
 #define TIF_SIGDELAYED		5	/* signal delayed from MCA/INIT/NMI/PMI context */
+#define TIF_RESTORE_SIGMASK	6	/* restore signal mask in do_signal() */
 #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_MEMDIE		17
 #define TIF_MCA_INIT		18	/* this task is processing MCA or INIT */
 #define TIF_DB_DISABLED		19	/* debug trap disabled for fsyscall */
+#define TIF_FREEZE		20	/* Freeze request, atomic version of PF_FREEZE */ 
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SYSCALL_TRACEAUDIT	(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
+#define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
 #define _TIF_SIGDELAYED		(1 << TIF_SIGDELAYED)
@@ -108,7 +111,7 @@ struct thread_info {
 #define _TIF_DB_DISABLED	(1 << TIF_DB_DISABLED)
 
 /* "work to do on user-return" bits */
-#define TIF_ALLWORK_MASK	(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SIGDELAYED)
+#define TIF_ALLWORK_MASK	(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SIGDELAYED|_TIF_RESTORE_SIGMASK)
 /* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */
 #define TIF_WORK_MASK		(TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT))
 
diff -upr linux-2.6.16.46-0.12.orig/include/asm-ia64/timex.h linux-2.6.16.46-0.12-027test011/include/asm-ia64/timex.h
--- linux-2.6.16.46-0.12.orig/include/asm-ia64/timex.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/asm-ia64/timex.h	2007-08-28 17:35:30.000000000 +0400
@@ -10,6 +10,7 @@
  *			Also removed cacheflush_time as it's entirely unused.
  */
 
+#ifdef __KERNEL__
 #include <asm/intrinsics.h>
 #include <asm/processor.h>
 
@@ -39,4 +40,8 @@ get_cycles (void)
 	return ret;
 }
 
+extern unsigned int cpu_khz;
+
+#endif
+
 #endif /* _ASM_IA64_TIMEX_H */
diff -upr linux-2.6.16.46-0.12.orig/include/asm-ia64/unistd.h linux-2.6.16.46-0.12-027test011/include/asm-ia64/unistd.h
--- linux-2.6.16.46-0.12.orig/include/asm-ia64/unistd.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/asm-ia64/unistd.h	2007-08-28 17:35:34.000000000 +0400
@@ -285,14 +285,27 @@
 #define __NR_faccessat			1293
 /* 1294, 1295 reserved for pselect/ppoll */
 #define __NR_unshare			1296
+#define __NR_fairsched_vcpus		1499
+#define __NR_fairsched_mknod		1500
+#define __NR_fairsched_rmnod		1501
+#define __NR_fairsched_chwt		1502
+#define __NR_fairsched_mvpr		1503
+#define __NR_fairsched_rate		1504
+#define __NR_getluid			1505
+#define __NR_setluid			1506
+#define __NR_setublimit			1507
+#define __NR_ubstat			1508
+#define __NR_lchmod			1509
+#define __NR_lutime			1510
 
 #ifdef __KERNEL__
 
 #include <linux/config.h>
 
-#define NR_syscalls			273 /* length of syscall table */
+#define NR_syscalls			487 /* length of syscall table */
 
 #define __ARCH_WANT_SYS_RT_SIGACTION
+#define __ARCH_WANT_SYS_RT_SIGSUSPEND
 
 #ifdef CONFIG_IA32_SUPPORT
 # define __ARCH_WANT_SYS_FADVISE64
@@ -303,6 +316,7 @@
 # define __ARCH_WANT_SYS_OLDUMOUNT
 # define __ARCH_WANT_SYS_SIGPENDING
 # define __ARCH_WANT_SYS_SIGPROCMASK
+# define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
 # define __ARCH_WANT_COMPAT_SYS_TIME
 #endif
 
diff -upr linux-2.6.16.46-0.12.orig/include/asm-powerpc/mman.h linux-2.6.16.46-0.12-027test011/include/asm-powerpc/mman.h
--- linux-2.6.16.46-0.12.orig/include/asm-powerpc/mman.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/asm-powerpc/mman.h	2007-08-28 17:35:30.000000000 +0400
@@ -23,5 +23,6 @@
 
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_EXECPRIO   0x20000         /* do soft ubc charge */
 
 #endif	/* _ASM_POWERPC_MMAN_H */
diff -upr linux-2.6.16.46-0.12.orig/include/asm-powerpc/pgalloc.h linux-2.6.16.46-0.12-027test011/include/asm-powerpc/pgalloc.h
--- linux-2.6.16.46-0.12.orig/include/asm-powerpc/pgalloc.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/asm-powerpc/pgalloc.h	2007-08-28 17:35:30.000000000 +0400
@@ -33,7 +33,8 @@ extern kmem_cache_t *pgtable_cache[];
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM], GFP_KERNEL);
+	return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM],
+			GFP_KERNEL_UBC | __GFP_SOFT_UBC);
 }
 
 static inline void pgd_free(pgd_t *pgd)
@@ -48,7 +49,7 @@ static inline void pgd_free(pgd_t *pgd)
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
 	return kmem_cache_alloc(pgtable_cache[PUD_CACHE_NUM],
-				GFP_KERNEL|__GFP_REPEAT);
+				GFP_KERNEL_UBC|__GFP_SOFT_UBC|__GFP_REPEAT);
 }
 
 static inline void pud_free(pud_t *pud)
@@ -84,7 +85,7 @@ static inline void pmd_populate_kernel(s
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
 	return kmem_cache_alloc(pgtable_cache[PMD_CACHE_NUM],
-				GFP_KERNEL|__GFP_REPEAT);
+				GFP_KERNEL_UBC|__GFP_SOFT_UBC|__GFP_REPEAT);
 }
 
 static inline void pmd_free(pmd_t *pmd)
@@ -92,17 +93,21 @@ static inline void pmd_free(pmd_t *pmd)
 	kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd);
 }
 
+static inline pte_t *do_pte_alloc(gfp_t flags)
+{
+	return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM], flags);
+}
+
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
-	return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM],
-				GFP_KERNEL|__GFP_REPEAT);
+	return do_pte_alloc(GFP_KERNEL | __GFP_REPEAT);
 }
 
 static inline struct page *pte_alloc_one(struct mm_struct *mm,
 					 unsigned long address)
 {
-	return virt_to_page(pte_alloc_one_kernel(mm, address));
+	return virt_to_page(do_pte_alloc(GFP_KERNEL_UBC | __GFP_SOFT_UBC));
 }
 		
 static inline void pte_free_kernel(pte_t *pte)
diff -upr linux-2.6.16.46-0.12.orig/include/asm-powerpc/thread_info.h linux-2.6.16.46-0.12-027test011/include/asm-powerpc/thread_info.h
--- linux-2.6.16.46-0.12.orig/include/asm-powerpc/thread_info.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/asm-powerpc/thread_info.h	2007-08-28 17:35:29.000000000 +0400
@@ -121,6 +121,8 @@ static inline struct thread_info *curren
 #define TIF_RESTOREALL		12	/* Restore all regs (implies NOERROR) */
 #define TIF_NOERROR		14	/* Force successful syscall return */
 #define TIF_RESTORE_SIGMASK	15	/* Restore signal mask in do_signal */
+#define TIF_FREEZE		16	/* Freeze request, atomic version
+					   of PF_FREEZE */
 
 /* as above, but as bit values */
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
diff -upr linux-2.6.16.46-0.12.orig/include/asm-powerpc/unistd.h linux-2.6.16.46-0.12-027test011/include/asm-powerpc/unistd.h
--- linux-2.6.16.46-0.12.orig/include/asm-powerpc/unistd.h	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/asm-powerpc/unistd.h	2007-08-28 17:35:34.000000000 +0400
@@ -327,8 +327,17 @@
 #define __NR_get_robust_list	299
 #define __NR_set_robust_list	300
 */
+#define __NR_fairsched_mknod	400
+#define __NR_fairsched_rmnod	401
+#define __NR_fairsched_chwt	402
+#define __NR_fairsched_mvpr	403
+#define __NR_fairsched_rate	404
+#define __NR_getluid		410
+#define __NR_setluid		411
+#define __NR_setublimit		412
+#define __NR_ubstat		413
 
-#define __NR_syscalls		301
+#define __NR_syscalls		414
 
 #ifdef __KERNEL__
 #define __NR__exit __NR_exit
diff -upr linux-2.6.16.46-0.12.orig/include/asm-s390/pgalloc.h linux-2.6.16.46-0.12-027test011/include/asm-s390/pgalloc.h
--- linux-2.6.16.46-0.12.orig/include/asm-s390/pgalloc.h	2007-08-24 19:28:22.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/asm-s390/pgalloc.h	2007-08-28 17:35:30.000000000 +0400
@@ -40,14 +40,16 @@ extern void diag10(unsigned long addr);
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	pgd_t *pgd = (pgd_t *) __get_free_pages(GFP_KERNEL, PGD_ALLOC_ORDER);
+	pgd_t *pgd = (pgd_t *) __get_free_pages(GFP_KERNEL_UBC |
+					__GFP_SOFT_UBC, PGD_ALLOC_ORDER);
 	int i;
 
 	if (!pgd)
 		return NULL;
 	if (s390_noexec) {
 		pgd_t *shadow_pgd = (pgd_t *)
-			__get_free_pages(GFP_KERNEL, PGD_ALLOC_ORDER);
+			__get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC,
+							PGD_ALLOC_ORDER);
 		struct page *page = virt_to_page(pgd);
 
 		if (!shadow_pgd) {
@@ -87,14 +89,16 @@ static inline void pgd_free(pgd_t *pgd)
 #else /* __s390x__ */
 static inline pmd_t * pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
 {
-	pmd_t *pmd = (pmd_t *) __get_free_pages(GFP_KERNEL, PMD_ALLOC_ORDER);
+	pmd_t *pmd = (pmd_t *) __get_free_pages(GFP_KERNEL_UBC |
+					__GFP_SOFT_UBC, PMD_ALLOC_ORDER);
 	int i;
 
 	if (!pmd)
 		return NULL;
 	if (s390_noexec) {
 		pmd_t *shadow_pmd = (pmd_t *)
-			__get_free_pages(GFP_KERNEL, PMD_ALLOC_ORDER);
+			__get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC,
+							PMD_ALLOC_ORDER);
 		struct page *page = virt_to_page(pmd);
 
 		if (!shadow_pmd) {
@@ -161,15 +165,13 @@ pmd_populate(struct mm_struct *mm, pmd_t
 		pmd_populate_kernel(mm, shadow_pmd, shadow_pte);
 }
 
-/*
- * page table entry allocation/free routines.
- */
-static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long vmaddr)
+static inline pte_t *pte_alloc(struct mm_struct *mm, unsigned long vmaddr,
+		gfp_t mask)
 {
-	pte_t *pte = (pte_t *) __get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	pte_t *pte;
 	int i;
 
+	pte = (pte_t *)__get_free_page(mask);
 	if (!pte)
 		return NULL;
 	if (s390_noexec) {
@@ -190,10 +192,20 @@ pte_alloc_one_kernel(struct mm_struct *m
 	return pte;
 }
 
+/*
+ * page table entry allocation/free routines.
+ */
+static inline pte_t *
+pte_alloc_one_kernel(struct mm_struct *mm, unsigned long vmaddr)
+{
+	return pte_alloc(mm, vmaddr, GFP_KERNEL | __GFP_REPEAT);
+}
+
 static inline struct page *
 pte_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
 {
-	pte_t *pte = pte_alloc_one_kernel(mm, vmaddr);
+	pte_t *pte = pte_alloc(mm, vmaddr, GFP_KERNEL_UBC | __GFP_SOFT_UBC |
+			__GFP_REPEAT);
 	if (pte)
 		return virt_to_page(pte);
 	return 0;
diff -upr linux-2.6.16.46-0.12.orig/include/asm-sh64/pgalloc.h linux-2.6.16.46-0.12-027test011/include/asm-sh64/pgalloc.h
--- linux-2.6.16.46-0.12.orig/include/asm-sh64/pgalloc.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/asm-sh64/pgalloc.h	2007-08-28 17:35:31.000000000 +0400
@@ -173,7 +173,7 @@ static inline void set_pgdir(unsigned lo
 	pgd_t *pgd;
 
 	read_lock(&tasklist_lock);
-	for_each_process(p) {
+	for_each_process_all(p) {
 		if (!p->mm)
 			continue;
 		*pgd_offset(p->mm,address) = entry;
diff -upr linux-2.6.16.46-0.12.orig/include/asm-sparc64/mman.h linux-2.6.16.46-0.12-027test011/include/asm-sparc64/mman.h
--- linux-2.6.16.46-0.12.orig/include/asm-sparc64/mman.h	2007-08-24 19:28:08.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/asm-sparc64/mman.h	2007-08-28 17:35:30.000000000 +0400
@@ -21,6 +21,7 @@
 
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_EXECPRIO	0x20000		/* do soft ubc charge */
 
 /* XXX Need to add flags to SunOS's mctl, mlockall, and madvise system
  * XXX calls.
diff -upr linux-2.6.16.46-0.12.orig/include/asm-sparc64/pgalloc.h linux-2.6.16.46-0.12-027test011/include/asm-sparc64/pgalloc.h
--- linux-2.6.16.46-0.12.orig/include/asm-sparc64/pgalloc.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/asm-sparc64/pgalloc.h	2007-08-28 17:35:30.000000000 +0400
@@ -43,13 +43,18 @@ static __inline__ pgd_t *get_pgd_fast(vo
 
 	preempt_disable();
 	if((ret = pgd_quicklist) != NULL) {
+		if (ub_page_charge(virt_to_page(ret), 0,
+					__GFP_UBC | __GFP_SOFT_UBC))
+			goto out;
 		pgd_quicklist = (unsigned long *)(*ret);
 		ret[0] = 0;
 		pgtable_cache_size--;
+out:
 		preempt_enable();
 	} else {
 		preempt_enable();
-		ret = (unsigned long *) __get_free_page(GFP_KERNEL|__GFP_REPEAT);
+		ret = (unsigned long *) __get_free_page(GFP_KERNEL_UBC |
+				__GFP_REPEAT);
 		if(ret)
 			memset(ret, 0, PAGE_SIZE);
 	}
@@ -81,10 +86,14 @@ static __inline__ pmd_t *pmd_alloc_one_f
 		color = 1;
 
 	if((ret = (unsigned long *)pte_quicklist[color]) != NULL) {
+		if (ub_page_charge(virt_to_page(ret), 0,
+					__GFP_UBC | __GFP_SOFT_UBC))
+			goto out;
 		pte_quicklist[color] = (unsigned long *)(*ret);
 		ret[0] = 0;
 		pgtable_cache_size--;
 	}
+out:
 	preempt_enable();
 
 	return (pmd_t *)ret;
@@ -129,9 +138,16 @@ static inline struct page *
 pte_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
 	pte_t *pte = pte_alloc_one_kernel(mm, addr);
+	struct page *pg;
 
-	if (pte)
-		return virt_to_page(pte);
+	if (pte) {
+		pg = virt_to_page(pte);
+		if (ub_page_charge(pg, 0, __GFP_UBC | __GFP_SOFT_UBC)) {
+			pte_free(pg);
+			pg = NULL;
+		}
+		return pg;
+	}
 
 	return NULL;
 }
@@ -143,10 +159,14 @@ static __inline__ pte_t *pte_alloc_one_f
 
 	preempt_disable();
 	if((ret = (unsigned long *)pte_quicklist[color]) != NULL) {
+		if (ub_page_charge(virt_to_page(ret), 0,
+					__GFP_UBC | __GFP_SOFT_UBC))
+			goto out;
 		pte_quicklist[color] = (unsigned long *)(*ret);
 		ret[0] = 0;
 		pgtable_cache_size--;
 	}
+out:
 	preempt_enable();
 	return (pte_t *)ret;
 }
diff -upr linux-2.6.16.46-0.12.orig/include/asm-sparc64/thread_info.h linux-2.6.16.46-0.12-027test011/include/asm-sparc64/thread_info.h
--- linux-2.6.16.46-0.12.orig/include/asm-sparc64/thread_info.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/asm-sparc64/thread_info.h	2007-08-28 17:35:33.000000000 +0400
@@ -165,14 +165,14 @@ register struct thread_info *current_thr
 	struct thread_info *ret;				\
 								\
 	ret = (struct thread_info *)				\
-	  __get_free_pages(GFP_KERNEL, __THREAD_INFO_ORDER);	\
+	  __get_free_pages(GFP_KERNEL_UBC, __THREAD_INFO_ORDER);\
 	if (ret)						\
 		memset(ret, 0, PAGE_SIZE<<__THREAD_INFO_ORDER);	\
 	ret;							\
 })
 #else
 #define alloc_thread_info(tsk) \
-	((struct thread_info *)__get_free_pages(GFP_KERNEL, __THREAD_INFO_ORDER))
+	((struct thread_info *)__get_free_pages(GFP_KERNEL_UBC, __THREAD_INFO_ORDER))
 #endif
 
 #define free_thread_info(ti) \
@@ -239,6 +239,7 @@ register struct thread_info *current_thr
 #define TIF_ABI_PENDING		12
 #define TIF_MEMDIE		13
 #define TIF_POLLING_NRFLAG	14
+#define TIF_FREEZE		15	/* Freeze request (atomic PF_FREEZE) */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
diff -upr linux-2.6.16.46-0.12.orig/include/asm-x86_64/mman.h linux-2.6.16.46-0.12-027test011/include/asm-x86_64/mman.h
--- linux-2.6.16.46-0.12.orig/include/asm-x86_64/mman.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/asm-x86_64/mman.h	2007-08-28 17:35:30.000000000 +0400
@@ -12,6 +12,7 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_EXECPRIO	0x20000		/* soft ubc charge */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff -upr linux-2.6.16.46-0.12.orig/include/asm-x86_64/msr.h linux-2.6.16.46-0.12-027test011/include/asm-x86_64/msr.h
--- linux-2.6.16.46-0.12.orig/include/asm-x86_64/msr.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/asm-x86_64/msr.h	2007-08-28 17:35:30.000000000 +0400
@@ -149,6 +149,33 @@ static inline unsigned int cpuid_edx(uns
 #define MSR_IA32_UCODE_WRITE		0x79
 #define MSR_IA32_UCODE_REV		0x8b
 
+#ifdef CONFIG_SMP
+void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
+void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
+
+void cpuid_on_cpu(unsigned int cpu, u32 op, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
+u32 cpuid_eax_on_cpu(unsigned int cpu, u32 op);
+#else
+static inline void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
+{
+	rdmsr(msr_no, *l, *h);
+}
+
+static inline void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
+{
+	wrmsr(msr_no, l, h);
+}
+
+static inline void cpuid_on_cpu(unsigned int cpu, u32 op, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
+{
+	cpuid(op, eax, ebx, ecx, edx);
+}
+
+static inline u32 cpuid_eax_on_cpu(unsigned int cpu, u32 op)
+{
+	return cpuid_eax(op);
+}
+#endif
 
 #endif
 
diff -upr linux-2.6.16.46-0.12.orig/include/asm-x86_64/nmi.h linux-2.6.16.46-0.12-027test011/include/asm-x86_64/nmi.h
--- linux-2.6.16.46-0.12.orig/include/asm-x86_64/nmi.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/asm-x86_64/nmi.h	2007-08-28 17:35:29.000000000 +0400
@@ -24,6 +24,9 @@ void set_nmi_callback(nmi_callback_t cal
  * Remove the handler previously set.
  */
 void unset_nmi_callback(void);
+
+void set_nmi_ipi_callback(nmi_callback_t callback);
+void unset_nmi_ipi_callback(void);
  
 #ifdef CONFIG_PM
  
diff -upr linux-2.6.16.46-0.12.orig/include/asm-x86_64/pgalloc.h linux-2.6.16.46-0.12-027test011/include/asm-x86_64/pgalloc.h
--- linux-2.6.16.46-0.12.orig/include/asm-x86_64/pgalloc.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/asm-x86_64/pgalloc.h	2007-08-28 17:35:30.000000000 +0400
@@ -31,12 +31,14 @@ static inline void pmd_free(pmd_t *pmd)
 
 static inline pmd_t *pmd_alloc_one (struct mm_struct *mm, unsigned long addr)
 {
-	return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+	return (pmd_t *)get_zeroed_page(GFP_KERNEL_UBC|__GFP_REPEAT|
+			__GFP_SOFT_UBC);
 }
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+	return (pud_t *)get_zeroed_page(GFP_KERNEL_UBC|__GFP_REPEAT|
+			__GFP_SOFT_UBC);
 }
 
 static inline void pud_free (pud_t *pud)
@@ -48,7 +50,8 @@ static inline void pud_free (pud_t *pud)
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	unsigned boundary;
-	pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL_UBC|__GFP_REPEAT|
+			__GFP_SOFT_UBC);
 	if (!pgd)
 		return NULL;
 	/*
@@ -77,7 +80,8 @@ static inline pte_t *pte_alloc_one_kerne
 
 static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-	void *p = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+	void *p = (void *)get_zeroed_page(GFP_KERNEL_UBC|__GFP_REPEAT|
+			__GFP_SOFT_UBC);
 	if (!p)
 		return NULL;
 	return virt_to_page(p);
diff -upr linux-2.6.16.46-0.12.orig/include/asm-x86_64/processor.h linux-2.6.16.46-0.12-027test011/include/asm-x86_64/processor.h
--- linux-2.6.16.46-0.12.orig/include/asm-x86_64/processor.h	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/asm-x86_64/processor.h	2007-08-28 17:35:33.000000000 +0400
@@ -174,7 +174,7 @@ static inline void clear_in_cr4 (unsigne
  * /proc/pid/unmap_base is only supported for 32bit processes without
  * 3GB personality for now.
  */
-#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? 0xc0000000 : 0xFFFFe000)
+#define IA32_PAGE_OFFSET 0xc0000000
 
 #define TASK_SIZE 		(test_thread_flag(TIF_IA32) ? IA32_PAGE_OFFSET : TASK_SIZE64)
 #define TASK_SIZE_OF(child) 	((test_tsk_thread_flag(child, TIF_IA32)) ? IA32_PAGE_OFFSET : TASK_SIZE64)
diff -upr linux-2.6.16.46-0.12.orig/include/asm-x86_64/segment.h linux-2.6.16.46-0.12-027test011/include/asm-x86_64/segment.h
--- linux-2.6.16.46-0.12.orig/include/asm-x86_64/segment.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/asm-x86_64/segment.h	2007-08-28 17:35:33.000000000 +0400
@@ -3,29 +3,28 @@
 
 #include <asm/cache.h>
 
-#define __KERNEL_CS	0x10
-#define __KERNEL_DS	0x18
-
-#define __KERNEL32_CS   0x38
-
+#define GDT_ENTRY_BOOT_CS		2
+#define __BOOT_CS	(GDT_ENTRY_BOOT_CS * 8)
+#define GDT_ENTRY_BOOT_DS		3
+#define __BOOT_DS	(GDT_ENTRY_BOOT_DS * 8)
+#define GDT_ENTRY_TSS 4	/* needs two entries */
 /* 
  * we cannot use the same code segment descriptor for user and kernel
  * -- not even in the long flat mode, because of different DPL /kkeil 
  * The segment offset needs to contain a RPL. Grr. -AK
  * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets) 
  */
+#define GDT_ENTRY_TLS_MIN 6
+#define GDT_ENTRY_TLS_MAX 8
 
-#define __USER32_CS   0x23   /* 4*8+3 */ 
-#define __USER_DS     0x2b   /* 5*8+3 */ 
-#define __USER_CS     0x33   /* 6*8+3 */ 
+#define GDT_ENTRY_LDT 9 /* needs two entries */
+#define __KERNEL32_CS   0x58	/* 11*8 */
+#define __KERNEL_CS	0x60	/* 12*8 */
+#define __KERNEL_DS	0x68	/* 13*8 */
+#define __USER32_CS   0x73   /* 14*8+3 */
+#define __USER_DS     0x7b   /* 15*8+3 */
 #define __USER32_DS	__USER_DS 
-
-#define GDT_ENTRY_TLS 1
-#define GDT_ENTRY_TSS 8	/* needs two entries */
-#define GDT_ENTRY_LDT 10 /* needs two entries */
-#define GDT_ENTRY_TLS_MIN 12
-#define GDT_ENTRY_TLS_MAX 14
-/* 15 free */
+#define __USER_CS     0x83   /* 16*8+3 */
 
 #define GDT_ENTRY_TLS_ENTRIES 3
 
@@ -37,7 +36,7 @@
 #define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
 
 #define IDT_ENTRIES 256
-#define GDT_ENTRIES 16
+#define GDT_ENTRIES 32
 #define GDT_SIZE (GDT_ENTRIES * 8)
 #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) 
 
diff -upr linux-2.6.16.46-0.12.orig/include/asm-x86_64/signal.h linux-2.6.16.46-0.12-027test011/include/asm-x86_64/signal.h
--- linux-2.6.16.46-0.12.orig/include/asm-x86_64/signal.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/asm-x86_64/signal.h	2007-08-28 17:35:33.000000000 +0400
@@ -23,11 +23,6 @@ typedef struct {
 	unsigned long sig[_NSIG_WORDS];
 } sigset_t;
 
-
-struct pt_regs; 
-asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset);
-
-
 #else
 /* Here we must cater to libcs that poke about in kernel headers.  */
 
diff -upr linux-2.6.16.46-0.12.orig/include/asm-x86_64/thread_info.h linux-2.6.16.46-0.12-027test011/include/asm-x86_64/thread_info.h
--- linux-2.6.16.46-0.12.orig/include/asm-x86_64/thread_info.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/asm-x86_64/thread_info.h	2007-08-28 17:35:33.000000000 +0400
@@ -74,7 +74,7 @@ static inline struct thread_info *stack_
 
 /* thread information allocation */
 #define alloc_thread_info(tsk) \
-	((struct thread_info *) __get_free_pages(GFP_KERNEL,THREAD_ORDER))
+	((struct thread_info *) __get_free_pages(GFP_KERNEL_UBC,THREAD_ORDER))
 #define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_ORDER)
 
 #else /* !__ASSEMBLY__ */
@@ -101,11 +101,14 @@ static inline struct thread_info *stack_
 #define TIF_IRET		5	/* force IRET */
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SECCOMP		8	/* secure computing */
+#define TIF_RESTORE_SIGMASK	9	/* restore signal mask in do_signal() */
 #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_IA32		17	/* 32bit process */ 
 #define TIF_FORK		18	/* ret_from_fork */
 #define TIF_ABI_PENDING		19
-#define TIF_MEMDIE		20
+#define TIF_FREEZE		20
+#define TIF_MEMDIE		21
+#define TIF_RESUME		22
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
@@ -115,10 +118,12 @@ static inline struct thread_info *stack_
 #define _TIF_IRET		(1<<TIF_IRET)
 #define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1<<TIF_SECCOMP)
+#define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_IA32		(1<<TIF_IA32)
 #define _TIF_FORK		(1<<TIF_FORK)
 #define _TIF_ABI_PENDING	(1<<TIF_ABI_PENDING)
+#define _TIF_RESUME		(1<<TIF_RESUME)
 
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK \
diff -upr linux-2.6.16.46-0.12.orig/include/asm-x86_64/unistd.h linux-2.6.16.46-0.12-027test011/include/asm-x86_64/unistd.h
--- linux-2.6.16.46-0.12.orig/include/asm-x86_64/unistd.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/asm-x86_64/unistd.h	2007-08-28 17:35:34.000000000 +0400
@@ -605,8 +605,32 @@ __SYSCALL(__NR_pselect6, sys_ni_syscall)
 __SYSCALL(__NR_ppoll,	sys_ni_syscall)		/* for now */
 #define __NR_unshare		272
 __SYSCALL(__NR_unshare,	sys_unshare)
+#define __NR_fairsched_vcpus	499
+__SYSCALL(__NR_fairsched_vcpus, sys_fairsched_vcpus)
+#define __NR_getluid		500
+__SYSCALL(__NR_getluid, sys_getluid)
+#define __NR_setluid		501
+__SYSCALL(__NR_setluid, sys_setluid)
+#define __NR_setublimit		502
+__SYSCALL(__NR_setublimit, sys_setublimit)
+#define __NR_ubstat		503
+__SYSCALL(__NR_ubstat, sys_ubstat)
+#define __NR_fairsched_mknod	504 /* FairScheduler syscalls */
+__SYSCALL(__NR_fairsched_mknod, sys_fairsched_mknod)
+#define __NR_fairsched_rmnod	505
+__SYSCALL(__NR_fairsched_rmnod, sys_fairsched_rmnod)
+#define __NR_fairsched_chwt	506
+__SYSCALL(__NR_fairsched_chwt, sys_fairsched_chwt)
+#define __NR_fairsched_mvpr	507
+__SYSCALL(__NR_fairsched_mvpr, sys_fairsched_mvpr)
+#define __NR_fairsched_rate	508
+__SYSCALL(__NR_fairsched_rate, sys_fairsched_rate)
+#define __NR_lchmod		509
+__SYSCALL(__NR_lchmod, sys_lchmod)
+#define __NR_lutime		510
+__SYSCALL(__NR_lutime, sys_lutime)
 
-#define __NR_syscall_max __NR_unshare
+#define __NR_syscall_max __NR_lutime
 
 #ifndef __NO_STUBS
 
@@ -645,6 +669,7 @@ do { \
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_TIME
 #define __ARCH_WANT_COMPAT_SYS_TIME
+#define __ARCH_WANT_SYS_RT_SIGSUSPEND
 #endif
 
 #ifndef __KERNEL_SYSCALLS__
diff -upr linux-2.6.16.46-0.12.orig/include/linux/aio.h linux-2.6.16.46-0.12-027test011/include/linux/aio.h
--- linux-2.6.16.46-0.12.orig/include/linux/aio.h	2007-08-24 19:28:32.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/aio.h	2007-08-28 17:35:33.000000000 +0400
@@ -252,4 +252,8 @@ static inline struct kiocb *list_kiocb(s
 extern unsigned long aio_nr;
 extern unsigned long aio_max_nr;
 
+void wait_for_all_aios(struct kioctx *ctx);
+extern kmem_cache_t	*kioctx_cachep;
+extern void aio_kick_handler(void *);
+
 #endif /* __LINUX__AIO_H */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/auto_fs.h linux-2.6.16.46-0.12-027test011/include/linux/auto_fs.h
--- linux-2.6.16.46-0.12.orig/include/linux/auto_fs.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/auto_fs.h	2007-08-28 17:35:30.000000000 +0400
@@ -51,6 +51,8 @@ typedef unsigned int autofs_wqt_t;
 typedef unsigned long autofs_wqt_t;
 #endif
 
+typedef __u32 autofs_wqt_t_32bit;
+
 /* Packet types */
 #define autofs_ptype_missing	0	/* Missing entry (mount request) */
 #define autofs_ptype_expire	1	/* Expire entry (umount request) */
@@ -67,6 +69,13 @@ struct autofs_packet_missing {
 	char name[NAME_MAX+1];
 };	
 
+struct autofs_packet_missing_32bit {
+	struct autofs_packet_hdr hdr;
+	autofs_wqt_t_32bit wait_queue_token;
+	int len;
+	char name[NAME_MAX+1];
+} __attribute__ ((__packed__));
+
 /* v3 expire (via ioctl) */
 struct autofs_packet_expire {
 	struct autofs_packet_hdr hdr;
@@ -74,6 +83,13 @@ struct autofs_packet_expire {
 	char name[NAME_MAX+1];
 };
 
+/* v3 expire (via ioctl) for 32 bit userspace daemon and x68_64 kernel */
+struct autofs_packet_expire_32bit {
+	struct autofs_packet_hdr hdr;
+	int len;
+	char name[NAME_MAX+1];
+} __attribute__ ((__packed__));
+
 #define AUTOFS_IOC_READY      _IO(0x93,0x60)
 #define AUTOFS_IOC_FAIL       _IO(0x93,0x61)
 #define AUTOFS_IOC_CATATONIC  _IO(0x93,0x62)
diff -upr linux-2.6.16.46-0.12.orig/include/linux/auto_fs4.h linux-2.6.16.46-0.12-027test011/include/linux/auto_fs4.h
--- linux-2.6.16.46-0.12.orig/include/linux/auto_fs4.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/auto_fs4.h	2007-08-28 17:35:30.000000000 +0400
@@ -40,11 +40,25 @@ struct autofs_packet_expire_multi {
 	char name[NAME_MAX+1];
 };
 
+/* v4 multi expire (via pipe) for 32 bit userspace daemon and x68_64 kernel */
+struct autofs_packet_expire_multi_32bit {
+	struct autofs_packet_hdr hdr;
+	autofs_wqt_t_32bit wait_queue_token;
+	int len;
+	char name[NAME_MAX+1];
+} __attribute__ ((__packed__));
+
 union autofs_packet_union {
 	struct autofs_packet_hdr hdr;
+	/* packet missing */
 	struct autofs_packet_missing missing;
+	struct autofs_packet_missing_32bit missing_32bit;
+	/* packet expire */
 	struct autofs_packet_expire expire;
+	struct autofs_packet_expire_32bit expire_32bit;
+	/* packet expire multi */
 	struct autofs_packet_expire_multi expire_multi;
+	struct autofs_packet_expire_multi_32bit expire_multi_32bit;
 };
 
 #define AUTOFS_IOC_EXPIRE_MULTI		_IOW(0x93,0x66,int)
diff -upr linux-2.6.16.46-0.12.orig/include/linux/capability.h linux-2.6.16.46-0.12-027test011/include/linux/capability.h
--- linux-2.6.16.46-0.12.orig/include/linux/capability.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/capability.h	2007-08-28 17:35:32.000000000 +0400
@@ -146,12 +146,9 @@ typedef __u32 kernel_cap_t;
 
 #define CAP_NET_BROADCAST    11
 
-/* Allow interface configuration */
 /* Allow administration of IP firewall, masquerading and accounting */
 /* Allow setting debug option on sockets */
 /* Allow modification of routing tables */
-/* Allow setting arbitrary process / process group ownership on
-   sockets */
 /* Allow binding to any address for transparent proxying */
 /* Allow setting TOS (type of service) */
 /* Allow setting promiscuous mode */
@@ -200,24 +197,19 @@ typedef __u32 kernel_cap_t;
 
 /* Allow configuration of the secure attention key */
 /* Allow administration of the random device */
-/* Allow examination and configuration of disk quotas */
 /* Allow configuring the kernel's syslog (printk behaviour) */
 /* Allow setting the domainname */
 /* Allow setting the hostname */
 /* Allow calling bdflush() */
-/* Allow mount() and umount(), setting up new smb connection */
+/* Allow setting up new smb connection */
 /* Allow some autofs root ioctls */
 /* Allow nfsservctl */
 /* Allow VM86_REQUEST_IRQ */
 /* Allow to read/write pci config on alpha */
 /* Allow irix_prctl on mips (setstacksize) */
 /* Allow flushing all cache on m68k (sys_cacheflush) */
-/* Allow removing semaphores */
-/* Used instead of CAP_CHOWN to "chown" IPC message queues, semaphores
-   and shared memory */
 /* Allow locking/unlocking of shared memory segment */
 /* Allow turning swap on/off */
-/* Allow forged pids on socket credentials passing */
 /* Allow setting readahead and flushing buffers on block devices */
 /* Allow setting geometry in floppy driver */
 /* Allow turning DMA on/off in xd driver */
@@ -288,11 +280,60 @@ typedef __u32 kernel_cap_t;
 
 #define CAP_AUDIT_CONTROL    30
 
+/*
+ * Important note: VZ capabilities do intersect with CAP_AUDIT
+ * this is due to compatibility reasons. Nothing bad.
+ * Both VZ and Audit/SELinux caps are disabled in VPSs.
+ */
+
+/* Allow access to all information. In the other case some structures will be
+   hiding to ensure different Virtual Environment non-interaction on the same
+   node */
+#define CAP_SETVEID	     29
+
+#define CAP_VE_ADMIN	     30
+
 #ifdef __KERNEL__
+
+#include <linux/config.h>
+
+#ifdef CONFIG_VE
+
+/* Replacement for CAP_NET_ADMIN:
+   delegated rights to the Virtual environment of its network administration.
+   For now the following rights have been delegated:
+
+   Allow setting arbitrary process / process group ownership on sockets
+   Allow interface configuration
+ */
+#define CAP_VE_NET_ADMIN     CAP_VE_ADMIN
+
+/* Replacement for CAP_SYS_ADMIN:
+   delegated rights to the Virtual environment of its administration.
+   For now the following rights have been delegated:
+ */
+/* Allow mount/umount/remount */
+/* Allow examination and configuration of disk quotas */
+/* Allow removing semaphores */
+/* Used instead of CAP_CHOWN to "chown" IPC message queues, semaphores
+   and shared memory */
+/* Allow locking/unlocking of shared memory segment */
+/* Allow forged pids on socket credentials passing */
+
+#define CAP_VE_SYS_ADMIN     CAP_VE_ADMIN
+#else
+#define CAP_VE_NET_ADMIN     CAP_NET_ADMIN
+#define CAP_VE_SYS_ADMIN     CAP_SYS_ADMIN
+#endif
+
 /* 
  * Bounding set
  */
+#ifndef CONFIG_VE
 extern kernel_cap_t cap_bset;
+#else
+#define cap_bset get_exec_env()->ve_cap_bset
+#endif
 
 /*
  * Internal kernel functions only
@@ -352,12 +393,18 @@ static inline kernel_cap_t cap_invert(ke
 #define cap_issubset(a,set)  (!(cap_t(a) & ~cap_t(set)))
 
 #define cap_clear(c)         do { cap_t(c) =  0; } while(0)
+#ifndef CONFIG_VE
 #define cap_set_full(c)      do { cap_t(c) = ~0; } while(0)
+#else
+#define cap_set_full(c) \
+        do {cap_t(c) = ve_is_super(get_exec_env()) ? ~0 :		\
+					cap_bset; } while(0)
+#endif
 #define cap_mask(c,mask)     do { cap_t(c) &= cap_t(mask); } while(0)
-
 #define cap_is_fs_cap(c)     (CAP_TO_MASK(c) & CAP_FS_MASK)
 
 extern int capable(int cap);
+extern spinlock_t task_capability_lock;
 
 #endif /* __KERNEL__ */
 
diff -upr linux-2.6.16.46-0.12.orig/include/linux/compat.h linux-2.6.16.46-0.12-027test011/include/linux/compat.h
--- linux-2.6.16.46-0.12.orig/include/linux/compat.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/compat.h	2007-08-28 17:35:33.000000000 +0400
@@ -181,5 +181,7 @@ static inline int compat_timespec_compar
 	return lhs->tv_nsec - rhs->tv_nsec;
 }
 
+extern long compat_nanosleep_restart(struct restart_block *restart);
+
 #endif /* CONFIG_COMPAT */
 #endif /* _LINUX_COMPAT_H */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/console.h linux-2.6.16.46-0.12-027test011/include/linux/console.h
--- linux-2.6.16.46-0.12.orig/include/linux/console.h	2007-08-24 19:28:10.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/console.h	2007-08-28 17:35:30.000000000 +0400
@@ -135,4 +135,22 @@ extern int is_console_locked(void);
 #define VESA_HSYNC_SUSPEND      2
 #define VESA_POWERDOWN          3
 
+
+#include <linux/preempt.h>
+#include <linux/cache.h>
+#include <linux/threads.h>
+
+struct printk_aligned {
+	int v;
+} ____cacheline_aligned;
+extern struct printk_aligned printk_no_wake_var[NR_CPUS];
+#define __printk_no_wake (printk_no_wake_var[smp_processor_id()].v)
+#define printk_no_wake ({ \
+			int v; \
+			preempt_disable(); \
+			v = __printk_no_wake; \
+			preempt_enable_no_resched(); \
+			v; \
+			})
+
 #endif /* _LINUX_CONSOLE_H */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/cpt_image.h linux-2.6.16.46-0.12-027test011/include/linux/cpt_image.h
--- linux-2.6.16.46-0.12.orig/include/linux/cpt_image.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/cpt_image.h	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,1621 @@
+/*
+ *
+ *  include/linux/cpt_image.h
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __CPT_IMAGE_H_
+#define __CPT_IMAGE_H_ 1
+
+#define CPT_NULL (~0ULL)
+#define CPT_NOINDEX (~0U)
+
+/*
+ * Image file layout.
+ *
+ * - major header
+ * - sections[]
+ *
+ *	Each section is:
+ *	- section header
+ *	- array of objects
+ *
+ * All data records are arch independent, 64 bit aligned.
+ */
+
+enum _cpt_object_type
+{
+	CPT_OBJ_TASK = 0,
+	CPT_OBJ_MM,
+	CPT_OBJ_FS,
+	CPT_OBJ_FILES,
+	CPT_OBJ_FILE,
+	CPT_OBJ_SIGHAND_STRUCT,
+	CPT_OBJ_SIGNAL_STRUCT,
+	CPT_OBJ_TTY,
+	CPT_OBJ_SOCKET,
+	CPT_OBJ_SYSVSEM_UNDO,
+	CPT_OBJ_NAMESPACE,
+	CPT_OBJ_SYSV_SHM,
+	CPT_OBJ_INODE,
+	CPT_OBJ_UBC,
+	CPT_OBJ_SLM_SGREG,
+	CPT_OBJ_SLM_REGOBJ,
+	CPT_OBJ_SLM_MM,
+	CPT_OBJ_MAX,
+	/* The objects above are stored in memory while checkpointing */
+
+	CPT_OBJ_VMA = 1024,
+	CPT_OBJ_FILEDESC,
+	CPT_OBJ_SIGHANDLER,
+	CPT_OBJ_SIGINFO,
+	CPT_OBJ_LASTSIGINFO,
+	CPT_OBJ_SYSV_SEM,
+	CPT_OBJ_SKB,
+	CPT_OBJ_FLOCK,
+	CPT_OBJ_OPENREQ,
+	CPT_OBJ_VFSMOUNT,
+	CPT_OBJ_TRAILER,
+	CPT_OBJ_SYSVSEM_UNDO_REC,
+	CPT_OBJ_NET_DEVICE,
+	CPT_OBJ_NET_IFADDR,
+	CPT_OBJ_NET_ROUTE,
+	CPT_OBJ_NET_CONNTRACK,
+	CPT_OBJ_NET_CONNTRACK_EXPECT,
+	CPT_OBJ_AIO_CONTEXT,
+	CPT_OBJ_VEINFO,
+	CPT_OBJ_EPOLL,
+	CPT_OBJ_EPOLL_FILE,
+	CPT_OBJ_SKFILTER,
+	CPT_OBJ_SIGALTSTACK,
+  	CPT_OBJ_SOCK_MCADDR,
+	CPT_OBJ_BIND_MNT,
+
+	CPT_OBJ_X86_REGS = 4096,
+	CPT_OBJ_X86_64_REGS,
+	CPT_OBJ_PAGES,
+	CPT_OBJ_COPYPAGES,
+	CPT_OBJ_REMAPPAGES,
+	CPT_OBJ_LAZYPAGES,
+	CPT_OBJ_NAME,
+	CPT_OBJ_BITS,
+	CPT_OBJ_REF,
+	CPT_OBJ_ITERPAGES,
+	CPT_OBJ_ITERYOUNGPAGES,
+	CPT_OBJ_VSYSCALL,
+	CPT_OBJ_IA64_REGS,
+	CPT_OBJ_INOTIFY,
+	CPT_OBJ_INOTIFY_WATCH,
+	CPT_OBJ_INOTIFY_EVENT,
+};
+
+#define CPT_ALIGN(n) (((n)+7)&~7)
+
+struct cpt_major_hdr
+{
+	__u8	cpt_signature[4];	/* Magic number */
+	__u16	cpt_hdrlen;		/* Length of this header */
+	__u16	cpt_image_version;	/* Format of this file */
+#define CPT_VERSION_MINOR(a)	((a) & 0xf)
+#define CPT_VERSION_8		0
+#define CPT_VERSION_9		0x100
+#define CPT_VERSION_9_1		0x101
+#define CPT_VERSION_16		0x200
+#define CPT_VERSION_18		0x300
+	__u16	cpt_os_arch;		/* Architecture */
+#define CPT_OS_ARCH_I386	0
+#define CPT_OS_ARCH_EMT64	1
+#define CPT_OS_ARCH_IA64	2
+	__u16	__cpt_pad1;
+	__u32	cpt_ve_features;	/* VE features */
+	__u32	cpt_ve_features2;	/* VE features */
+	__u16	cpt_pagesize;		/* Page size used by OS */
+	__u16	cpt_hz;			/* HZ used by OS */
+	__u64	cpt_start_jiffies64;	/* Jiffies */
+	__u32	cpt_start_sec;		/* Seconds */
+	__u32	cpt_start_nsec;		/* Nanoseconds */
+	__u32	cpt_cpu_caps[4];	/* CPU capabilities */
+	__u32	cpt_kernel_config[4];	/* Kernel config */
+	__u64	cpt_iptables_mask;	/* Used netfilter modules */
+} __attribute__ ((aligned (8)));
+
+#define CPT_SIGNATURE0 0x79
+#define CPT_SIGNATURE1 0x1c
+#define CPT_SIGNATURE2 0x01
+#define CPT_SIGNATURE3 0x63
+
+/* CPU capabilities */
+#define CPT_CPU_X86_CMOV	0
+#define CPT_CPU_X86_FXSR	1
+#define CPT_CPU_X86_SSE		2
+#define CPT_CPU_X86_SSE2	3
+#define CPT_CPU_X86_MMX		4
+#define CPT_CPU_X86_3DNOW	5
+#define CPT_CPU_X86_3DNOW2	6
+#define CPT_CPU_X86_SEP		7
+#define CPT_CPU_X86_EMT64	8
+#define CPT_CPU_X86_IA64	9
+
+/* Unsupported features */
+#define CPT_EXTERNAL_PROCESS	16
+#define CPT_NAMESPACES		17
+#define CPT_SCHEDULER_POLICY	18
+#define CPT_PTRACED_FROM_VE0	19
+#define CPT_UNSUPPORTED_FSTYPE	20
+#define CPT_BIND_MOUNT		21
+#define CPT_UNSUPPORTED_NETDEV	22
+#define CPT_UNSUPPORTED_MISC	23
+
+/* This mask is used to determine whether VE
+   has some unsupported features or not */
+#define CPT_UNSUPPORTED_MASK	0xffff0000UL
+
+#define CPT_KERNEL_CONFIG_PAE	0
+
+struct cpt_section_hdr
+{
+	__u64	cpt_next;
+	__u32	cpt_section;
+	__u16	cpt_hdrlen;
+	__u16	cpt_align;
+} __attribute__ ((aligned (8)));
+
+enum
+{
+	CPT_SECT_ERROR,			/* Error section, content is string */
+	CPT_SECT_VEINFO,
+	CPT_SECT_FILES,			/* Files. Content is array of file objects */
+	CPT_SECT_TASKS,
+	CPT_SECT_MM,
+	CPT_SECT_FILES_STRUCT,
+	CPT_SECT_FS,
+	CPT_SECT_SIGHAND_STRUCT,
+	CPT_SECT_TTY,
+	CPT_SECT_SOCKET,
+	CPT_SECT_NAMESPACE,
+	CPT_SECT_SYSVSEM_UNDO,
+	CPT_SECT_INODE,			/* Inodes with i->i_nlink==0 and
+					 * deleted dentires with inodes not
+					 * referenced inside dumped process.
+					 */
+	CPT_SECT_SYSV_SHM,
+	CPT_SECT_SYSV_SEM,
+	CPT_SECT_ORPHANS,
+	CPT_SECT_NET_DEVICE,
+	CPT_SECT_NET_IFADDR,
+	CPT_SECT_NET_ROUTE,
+	CPT_SECT_NET_IPTABLES,
+	CPT_SECT_NET_CONNTRACK,
+	CPT_SECT_NET_CONNTRACK_VE0,
+	CPT_SECT_UTSNAME,
+	CPT_SECT_TRAILER,
+	CPT_SECT_UBC,
+	CPT_SECT_SLM_SGREGS,
+	CPT_SECT_SLM_REGOBJS,
+/* Due to silly mistake we cannot index sections beyond this value */
+#define	CPT_SECT_MAX_INDEX	(CPT_SECT_SLM_REGOBJS+1)
+	CPT_SECT_EPOLL,
+	CPT_SECT_VSYSCALL,
+	CPT_SECT_INOTIFY,
+	CPT_SECT_MAX
+};
+
+struct cpt_major_tail
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_lazypages;
+	__u32	cpt_64bit;
+	__u64	cpt_sections[CPT_SECT_MAX_INDEX];
+	__u32	cpt_nsect;
+	__u8	cpt_signature[4];	/* Magic number */
+} __attribute__ ((aligned (8)));
+
+
+/* Common object header. */
+struct cpt_object_hdr
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+} __attribute__ ((aligned (8)));
+
+enum _cpt_content_type {
+	CPT_CONTENT_VOID,
+	CPT_CONTENT_ARRAY,
+	CPT_CONTENT_DATA,
+	CPT_CONTENT_NAME,
+
+	CPT_CONTENT_STACK,
+	CPT_CONTENT_X86_FPUSTATE_OLD,
+	CPT_CONTENT_X86_FPUSTATE,
+	CPT_CONTENT_MM_CONTEXT,
+	CPT_CONTENT_SEMARRAY,
+	CPT_CONTENT_SEMUNDO,
+	CPT_CONTENT_NLMARRAY,
+	CPT_CONTENT_MAX
+};
+
+/* CPT_OBJ_BITS: encode array of bytes */
+struct cpt_obj_bits
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_size;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+
+/* CPT_OBJ_REF: a reference to another object */
+struct cpt_obj_ref
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_pos;
+} __attribute__ ((aligned (8)));
+
+/* CPT_OBJ_VEINFO: various ve specific data */
+struct cpt_veinfo_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	/* ipc ctls */
+	__u32	shm_ctl_max;
+	__u32	shm_ctl_all;
+	__u32	shm_ctl_mni;
+	__u32	msg_ctl_max;
+	__u32	msg_ctl_mni;
+	__u32	msg_ctl_mnb;
+	__u32	sem_ctl_arr[4];
+
+	/* start time */
+	__u64	start_timespec_delta;
+	__u64	start_jiffies_delta;
+
+	/* later extension */
+	__u32	last_pid;
+	__u32	pad1;
+	__u64	reserved[8];
+} __attribute__ ((aligned (8)));
+
+/* CPT_OBJ_FILE: one struct file */
+struct cpt_file_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_flags;
+	__u32	cpt_mode;
+	__u64	cpt_pos;
+	__u32	cpt_uid;
+	__u32	cpt_gid;
+
+	__u32	cpt_i_mode;
+	__u32	cpt_lflags;
+#define CPT_DENTRY_DELETED	1
+#define CPT_DENTRY_ROOT		2
+#define CPT_DENTRY_CLONING	4
+#define CPT_DENTRY_PROC		8
+#define CPT_DENTRY_EPOLL	0x10
+#define CPT_DENTRY_REPLACED	0x20
+#define CPT_DENTRY_INOTIFY	0x40
+#define CPT_DENTRY_FUTEX	0x80
+	__u64	cpt_inode;
+	__u64	cpt_priv;
+
+	__u32	cpt_fown_fd;
+	__u32	cpt_fown_pid;
+	__u32	cpt_fown_uid;
+	__u32	cpt_fown_euid;
+	__u32	cpt_fown_signo;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+/* Followed by file name, encoded as CPT_OBJ_NAME */
+
+struct cpt_epoll_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_file;
+} __attribute__ ((aligned (8)));
+/* Followed by array of struct cpt_epoll_file */
+
+struct cpt_epoll_file_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_file;
+	__u32	cpt_fd;
+	__u32	cpt_events;
+	__u64	cpt_data;
+	__u32	cpt_revents;
+	__u32	cpt_ready;
+} __attribute__ ((aligned (8)));
+
+struct cpt_inotify_wd_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_wd;
+	__u32	cpt_mask;
+} __attribute__ ((aligned (8)));
+/* Followed by cpt_file_image of inode to watch */
+
+struct cpt_inotify_ev_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_wd;
+	__u32	cpt_mask;
+	__u32	cpt_cookie;
+	__u32	cpt_namelen;
+} __attribute__ ((aligned (8)));
+/* Followed by name */
+
+struct cpt_inotify_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_file;
+	__u32	cpt_user;
+	__u32	cpt_max_events;
+	__u32	cpt_last_wd;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+/* Followed by array of struct cpt_inotify_wd_image and cpt_inotify_ev_image */
+
+
+/* CPT_OBJ_FILEDESC: one file descriptor */
+struct cpt_fd_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_fd;
+	__u32	cpt_flags;
+#define CPT_FD_FLAG_CLOSEEXEC	1
+	__u64	cpt_file;
+} __attribute__ ((aligned (8)));
+
+/* CPT_OBJ_FILES: one files_struct */
+struct cpt_files_struct_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_index;
+	__u32	cpt_max_fds;
+	__u32	cpt_next_fd;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+/* Followed by array of cpt_fd_image */
+
+/* CPT_OBJ_FS: one fs_struct */
+struct cpt_fs_struct_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_umask;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+/* Followed by two/three CPT_OBJ_FILENAME for root, pwd and, optionally, altroot */
+
+/* CPT_OBJ_INODE: one struct inode */
+struct cpt_inode_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_dev;
+	__u64	cpt_ino;
+	__u32	cpt_mode;
+	__u32	cpt_nlink;
+	__u32	cpt_uid;
+	__u32	cpt_gid;
+	__u64	cpt_rdev;
+	__u64	cpt_size;
+	__u64	cpt_blksize;
+	__u64	cpt_atime;
+	__u64	cpt_mtime;
+	__u64	cpt_ctime;
+	__u64	cpt_blocks;
+	__u32	cpt_sb;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+
+/* CPT_OBJ_VFSMOUNT: one vfsmount */
+struct cpt_vfsmount_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_mntflags;
+#define CPT_MNT_BIND	0x80000000
+#define CPT_MNT_EXT	0x40000000
+	__u32	cpt_flags;
+} __attribute__ ((aligned (8)));
+
+
+struct cpt_flock_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_owner;
+	__u32	cpt_pid;
+	__u64	cpt_start;
+	__u64	cpt_end;
+	__u32	cpt_flags;
+	__u32	cpt_type;
+} __attribute__ ((aligned (8)));
+
+
+struct cpt_tty_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_flags;
+	__u32	cpt_link;
+	__u32	cpt_index;
+	__u32	cpt_drv_type;
+	__u32	cpt_drv_subtype;
+	__u32	cpt_drv_flags;
+	__u8	cpt_packet;
+	__u8	cpt_stopped;
+	__u8	cpt_hw_stopped;
+	__u8	cpt_flow_stopped;
+
+	__u32	cpt_canon_data;
+	__u32	cpt_canon_head;
+	__u32	cpt_canon_column;
+	__u32	cpt_column;
+	__u8	cpt_ctrl_status;
+	__u8	cpt_erasing;
+	__u8	cpt_lnext;
+	__u8	cpt_icanon;
+	__u8	cpt_raw;
+	__u8	cpt_real_raw;
+	__u8	cpt_closing;
+	__u8	__cpt_pad1;
+	__u16	cpt_minimum_to_wake;
+	__u16	__cpt_pad2;
+	__u32	cpt_pgrp;
+	__u32	cpt_session;
+	__u32	cpt_c_line;
+	__u8	cpt_name[64];
+	__u16	cpt_ws_row;
+	__u16	cpt_ws_col;
+	__u16	cpt_ws_prow;
+	__u16	cpt_ws_pcol;
+	__u8	cpt_c_cc[32];
+	__u32	cpt_c_iflag;
+	__u32	cpt_c_oflag;
+	__u32	cpt_c_cflag;
+	__u32	cpt_c_lflag;
+	__u32	cpt_read_flags[4096/32];
+} __attribute__ ((aligned (8)));
+
+struct cpt_sock_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_file;
+	__u32	cpt_parent;
+	__u32	cpt_index;
+
+	__u64	cpt_ssflags;
+	__u16	cpt_type;
+	__u16	cpt_family;
+	__u8	cpt_sstate;
+	__u8	cpt_passcred;
+	__u8	cpt_state;
+	__u8	cpt_reuse;
+
+	__u8	cpt_zapped;
+	__u8	cpt_shutdown;
+	__u8	cpt_userlocks;
+	__u8	cpt_no_check;
+	__u8	cpt_debug;
+	__u8	cpt_rcvtstamp;
+	__u8	cpt_localroute;
+	__u8	cpt_protocol;
+
+	__u32	cpt_err;
+	__u32	cpt_err_soft;
+
+	__u16	cpt_max_ack_backlog;
+	__u16   __cpt_pad1;
+	__u32	cpt_priority;
+
+	__u32	cpt_rcvlowat;
+	__u32	cpt_bound_dev_if;
+
+	__u64	cpt_rcvtimeo;
+	__u64	cpt_sndtimeo;
+	__u32	cpt_rcvbuf;
+	__u32	cpt_sndbuf;
+	__u64	cpt_flags;
+	__u64	cpt_lingertime;
+	__u32	cpt_peer_pid;
+	__u32	cpt_peer_uid;
+
+	__u32	cpt_peer_gid;
+	__u32	cpt_laddrlen;
+	__u32	cpt_laddr[128/4];
+	__u32	cpt_raddrlen;
+	__u32	cpt_raddr[128/4];
+	/* AF_UNIX */
+	__u32	cpt_peer;
+
+	__u8	cpt_socketpair;
+	__u8	cpt_deleted;
+	__u16	__cpt_pad4;
+	__u32	__cpt_pad5;
+/*
+	struct sk_filter      	*sk_filter;
+ */
+
+	__u64			cpt_stamp;
+	__u32			cpt_daddr;
+	__u16			cpt_dport;
+	__u16			cpt_sport;
+
+	__u32			cpt_saddr;
+	__u32			cpt_rcv_saddr;
+
+	__u32			cpt_uc_ttl;
+	__u32			cpt_tos;
+
+	__u32			cpt_cmsg_flags;
+	__u32			cpt_mc_index;
+
+	__u32			cpt_mc_addr;
+/*
+	struct ip_options	*opt;
+ */
+	__u8			cpt_hdrincl;
+	__u8			cpt_mc_ttl;
+	__u8			cpt_mc_loop;
+	__u8			cpt_pmtudisc;
+
+	__u8			cpt_recverr;
+	__u8			cpt_freebind;
+	__u16			cpt_idcounter;
+	__u32			cpt_cork_flags;
+
+	__u32			cpt_cork_fragsize;
+	__u32			cpt_cork_length;
+	__u32			cpt_cork_addr;
+	__u32			cpt_cork_saddr;
+	__u32			cpt_cork_daddr;
+	__u32			cpt_cork_oif;
+
+	__u32			cpt_udp_pending;
+	__u32			cpt_udp_corkflag;
+	__u16			cpt_udp_encap;
+	__u16			cpt_udp_len;
+	__u32			__cpt_pad7;
+
+	__u64			cpt_saddr6[2];
+	__u64			cpt_rcv_saddr6[2];
+	__u64			cpt_daddr6[2];
+	__u32			cpt_flow_label6;
+	__u32			cpt_frag_size6;
+	__u32			cpt_hop_limit6;
+	__u32			cpt_mcast_hops6;
+
+	__u32			cpt_mcast_oif6;
+	__u8			cpt_rxopt6;
+	__u8			cpt_mc_loop6;
+	__u8			cpt_recverr6;
+	__u8			cpt_sndflow6;
+
+	__u8			cpt_pmtudisc6;
+	__u8			cpt_ipv6only6;
+	__u8			cpt_mapped;
+	__u8			__cpt_pad8;
+	__u32	cpt_pred_flags;
+
+	__u32	cpt_rcv_nxt;
+	__u32	cpt_snd_nxt;
+
+	__u32	cpt_snd_una;
+	__u32	cpt_snd_sml;
+
+	__u32	cpt_rcv_tstamp;
+	__u32	cpt_lsndtime;
+
+	__u8	cpt_tcp_header_len;
+	__u8	cpt_ack_pending;
+	__u8	cpt_quick;
+	__u8	cpt_pingpong;
+	__u8	cpt_blocked;
+	__u8	__cpt_pad9;
+	__u16	__cpt_pad10;
+
+	__u32	cpt_ato;
+	__u32	cpt_ack_timeout;
+
+	__u32	cpt_lrcvtime;
+	__u16	cpt_last_seg_size;
+	__u16	cpt_rcv_mss;
+
+	__u32	cpt_snd_wl1;
+	__u32	cpt_snd_wnd;
+
+	__u32	cpt_max_window;
+	__u32	cpt_pmtu_cookie;
+
+	__u32	cpt_mss_cache;
+	__u16	cpt_mss_cache_std;
+	__u16	cpt_mss_clamp;
+
+	__u16	cpt_ext_header_len;
+	__u16	cpt_ext2_header_len;
+	__u8	cpt_ca_state;
+	__u8	cpt_retransmits;
+	__u8	cpt_reordering;
+	__u8	cpt_frto_counter;
+
+	__u32	cpt_frto_highmark;
+	__u8	cpt_adv_cong;
+	__u8	cpt_defer_accept;
+	__u8	cpt_backoff;
+	__u8	__cpt_pad11;
+
+	__u32	cpt_srtt;
+	__u32	cpt_mdev;
+
+	__u32	cpt_mdev_max;
+	__u32	cpt_rttvar;
+
+	__u32	cpt_rtt_seq;
+	__u32	cpt_rto;
+
+	__u32	cpt_packets_out;
+	__u32	cpt_left_out;
+
+	__u32	cpt_retrans_out;
+ 	__u32	cpt_snd_ssthresh;
+
+ 	__u32	cpt_snd_cwnd;
+ 	__u16	cpt_snd_cwnd_cnt;
+	__u16	cpt_snd_cwnd_clamp;
+
+	__u32	cpt_snd_cwnd_used;
+	__u32	cpt_snd_cwnd_stamp;
+
+	__u32	cpt_timeout;
+	__u32	cpt_ka_timeout;
+
+ 	__u32	cpt_rcv_wnd;
+	__u32	cpt_rcv_wup;
+
+	__u32	cpt_write_seq;
+	__u32	cpt_pushed_seq;
+
+	__u32	cpt_copied_seq;
+	__u8	cpt_tstamp_ok;
+	__u8	cpt_wscale_ok;
+	__u8	cpt_sack_ok;
+	__u8	cpt_saw_tstamp;
+
+        __u8	cpt_snd_wscale;
+        __u8	cpt_rcv_wscale;
+	__u8	cpt_nonagle;
+	__u8	cpt_keepalive_probes;
+        __u32	cpt_rcv_tsval;
+
+        __u32	cpt_rcv_tsecr;
+        __u32	cpt_ts_recent;
+
+	__u64	cpt_ts_recent_stamp;
+	__u16	cpt_user_mss;
+	__u8	cpt_dsack;
+	__u8	cpt_eff_sacks;
+	__u32	cpt_sack_array[2*5];
+	__u32	cpt_window_clamp;
+
+	__u32	cpt_rcv_ssthresh;
+	__u8	cpt_probes_out;
+	__u8	cpt_num_sacks;
+	__u16	cpt_advmss;
+
+	__u8	cpt_syn_retries;
+	__u8	cpt_ecn_flags;
+	__u16	cpt_prior_ssthresh;
+	__u32	cpt_lost_out;
+
+	__u32   cpt_sacked_out;
+	__u32   cpt_fackets_out;
+
+	__u32   cpt_high_seq;
+	__u32	cpt_retrans_stamp;
+
+	__u32	cpt_undo_marker;
+	__u32	cpt_undo_retrans;
+
+	__u32	cpt_urg_seq;
+	__u16	cpt_urg_data;
+	__u8	cpt_pending;
+	__u8	cpt_urg_mode;
+
+	__u32	cpt_snd_up;
+	__u32	cpt_keepalive_time;
+
+	__u32   cpt_keepalive_intvl;
+	__u32   cpt_linger2;
+
+	__u32	cpt_rcvrtt_rtt;
+	__u32	cpt_rcvrtt_seq;
+
+	__u32	cpt_rcvrtt_time;
+	__u32	__cpt_pad12;
+} __attribute__ ((aligned (8)));
+
+struct cpt_sockmc_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u16	cpt_family;
+	__u16	cpt_mode;
+	__u32	cpt_ifindex;
+	__u32	cpt_mcaddr[4];
+} __attribute__ ((aligned (8)));
+/* Followed by array of source addresses, each zero padded to 16 bytes */
+
+struct cpt_openreq_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_rcv_isn;
+	__u32	cpt_snt_isn;
+
+	__u16	cpt_rmt_port;
+	__u16	cpt_mss;
+	__u8	cpt_family;
+	__u8	cpt_retrans;
+	__u8	cpt_snd_wscale;
+	__u8	cpt_rcv_wscale;
+
+	__u8	cpt_tstamp_ok;
+	__u8	cpt_sack_ok;
+	__u8	cpt_wscale_ok;
+	__u8	cpt_ecn_ok;
+	__u8	cpt_acked;
+	__u8	__cpt_pad1;
+	__u16	__cpt_pad2;
+
+	__u32	cpt_window_clamp;
+	__u32	cpt_rcv_wnd;
+	__u32	cpt_ts_recent;
+	__u32	cpt_iif;
+	__u64	cpt_expires;
+
+	__u64	cpt_loc_addr[2];
+	__u64	cpt_rmt_addr[2];
+/*
+	struct ip_options	*opt;
+ */
+
+} __attribute__ ((aligned (8)));
+
+struct cpt_skb_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_owner;
+	__u32	cpt_queue;
+#define CPT_SKB_NQ	0
+#define CPT_SKB_RQ	1
+#define CPT_SKB_WQ	2
+#define CPT_SKB_OFOQ	3
+
+	__u64	cpt_stamp;
+	__u32	cpt_len;
+	__u32	cpt_hspace;
+	__u32	cpt_tspace;
+	__u32	cpt_h;
+	__u32	cpt_nh;
+	__u32	cpt_mac;
+
+	__u64	cpt_cb[5];
+	__u32	cpt_mac_len;
+	__u32	cpt_csum;
+	__u8	cpt_local_df;
+	__u8	cpt_pkt_type;
+	__u8	cpt_ip_summed;
+	__u8	__cpt_pad1;
+	__u32	cpt_priority;
+	__u16	cpt_protocol;
+	__u16	cpt_security;
+	__u16	cpt_gso_segs;
+	__u16	cpt_gso_size;
+} __attribute__ ((aligned (8)));
+
+
+struct cpt_sysvshm_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_key;
+	__u64	cpt_uid;
+	__u64	cpt_gid;
+	__u64	cpt_cuid;
+	__u64	cpt_cgid;
+	__u64	cpt_mode;
+	__u64	cpt_seq;
+
+	__u32	cpt_id;
+	__u32	cpt_mlockuser;
+	__u64	cpt_segsz;
+	__u64	cpt_atime;
+	__u64	cpt_ctime;
+	__u64	cpt_dtime;
+	__u64	cpt_creator;
+	__u64	cpt_last;
+} __attribute__ ((aligned (8)));
+
+
+struct cpt_sysvsem_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_key;
+	__u64	cpt_uid;
+	__u64	cpt_gid;
+	__u64	cpt_cuid;
+	__u64	cpt_cgid;
+	__u64	cpt_mode;
+	__u64	cpt_seq;
+	__u32	cpt_id;
+	__u32	__cpt_pad1;
+
+	__u64	cpt_otime;
+	__u64	cpt_ctime;
+} __attribute__ ((aligned (8)));
+/* Content is array of pairs semval/sempid */
+
+struct cpt_sysvsem_undo_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_id;
+	__u32	cpt_nsem;
+} __attribute__ ((aligned (8)));
+
+
+struct cpt_mm_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_start_code;
+	__u64	cpt_end_code;
+	__u64	cpt_start_data;
+	__u64	cpt_end_data;
+	__u64	cpt_start_brk;
+	__u64	cpt_brk;
+	__u64	cpt_start_stack;
+	__u64	cpt_start_arg;
+	__u64	cpt_end_arg;
+	__u64	cpt_start_env;
+	__u64	cpt_end_env;
+	__u64	cpt_def_flags;
+	__u64	cpt_mmub;
+	__u8	cpt_dumpable;
+	__u8	cpt_vps_dumpable;
+	__u8	cpt_used_hugetlb;
+	__u8	__cpt_pad;
+} __attribute__ ((aligned (8)));
+
+struct cpt_page_block
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_start;
+	__u64	cpt_end;
+} __attribute__ ((aligned (8)));
+
+struct cpt_remappage_block
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_start;
+	__u64	cpt_end;
+	__u64	cpt_pgoff;
+} __attribute__ ((aligned (8)));
+
+struct cpt_copypage_block
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_start;
+	__u64	cpt_end;
+	__u64	cpt_source;
+} __attribute__ ((aligned (8)));
+
+struct cpt_lazypage_block
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_start;
+	__u64	cpt_end;
+	__u64	cpt_index;
+} __attribute__ ((aligned (8)));
+
+struct cpt_iterpage_block
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_start;
+	__u64	cpt_end;
+} __attribute__ ((aligned (8)));
+/* Followed by array of PFNs */
+
+struct cpt_vma_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_file;
+	__u32	cpt_type;
+#define CPT_VMA_TYPE_0		0
+#define CPT_VMA_TYPE_SHM	1
+	__u32	cpt_anonvma;
+	__u64	cpt_anonvmaid;
+
+	__u64	cpt_start;
+	__u64	cpt_end;
+	__u64	cpt_flags;
+	__u64	cpt_pgprot;
+	__u64	cpt_pgoff;
+} __attribute__ ((aligned (8)));
+
+struct cpt_aio_ctx_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_max_reqs;
+	__u32	cpt_ring_pages;
+	__u32	cpt_tail;
+	__u32	cpt_nr;
+	__u64	cpt_mmap_base;
+	/* Data (io_event's) and struct aio_ring are stored in user space VM */
+} __attribute__ ((aligned (8)));
+
+
+/* Format of MM section.
+ *
+ * It is array of MM objects (mm_struct). Each MM object is
+ * header, encoding mm_struct, followed by array of VMA objects.
+ * Each VMA consists of VMA header, encoding vm_area_struct, and
+ * if the VMA contains copied pages, the header is followed by
+ * array of tuples start-end each followed by data.
+ *
+ * ATTN: no block/page alignment. Only 64bit alignment. This might be not good?
+ */
+
+struct cpt_restart_block {
+	__u64	fn;
+#define CPT_RBL_0			0
+#define CPT_RBL_NANOSLEEP		1
+#define CPT_RBL_COMPAT_NANOSLEEP	2
+	__u64	arg0;
+	__u64	arg1;
+	__u64	arg2;
+	__u64	arg3;
+} __attribute__ ((aligned (8)));
+
+struct cpt_siginfo_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_qflags;
+	__u32	cpt_signo;
+	__u32	cpt_errno;
+	__u32	cpt_code;
+
+	__u64	cpt_sigval;
+	__u32	cpt_pid;
+	__u32	cpt_uid;
+	__u64	cpt_utime;
+	__u64	cpt_stime;
+
+	__u64	cpt_user;
+} __attribute__ ((aligned (8)));
+
+/* Portable presentaions for segment registers */
+
+#define CPT_SEG_ZERO		0
+#define CPT_SEG_TLS1		1
+#define CPT_SEG_TLS2		2
+#define CPT_SEG_TLS3		3
+#define CPT_SEG_USER32_DS	4
+#define CPT_SEG_USER32_CS	5
+#define CPT_SEG_USER64_DS	6
+#define CPT_SEG_USER64_CS	7
+#define CPT_SEG_LDT		256
+
+struct cpt_x86_regs
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_debugreg[8];
+	__u32	cpt_fs;
+	__u32	cpt_gs;
+
+	__u32	cpt_ebx;
+	__u32	cpt_ecx;
+	__u32	cpt_edx;
+	__u32	cpt_esi;
+	__u32	cpt_edi;
+	__u32	cpt_ebp;
+	__u32	cpt_eax;
+	__u32	cpt_xds;
+	__u32	cpt_xes;
+	__u32	cpt_orig_eax;
+	__u32	cpt_eip;
+	__u32	cpt_xcs;
+	__u32	cpt_eflags;
+	__u32	cpt_esp;
+	__u32	cpt_xss;
+	__u32	cpt_pad;
+};
+
+struct cpt_x86_64_regs
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_debugreg[8];
+
+	__u64	cpt_fsbase;
+	__u64	cpt_gsbase;
+	__u32	cpt_fsindex;
+	__u32	cpt_gsindex;
+	__u32	cpt_ds;
+	__u32	cpt_es;
+
+	__u64	cpt_r15;
+	__u64	cpt_r14;
+	__u64	cpt_r13;
+	__u64	cpt_r12;
+	__u64	cpt_rbp;
+	__u64	cpt_rbx;
+	__u64	cpt_r11;
+	__u64	cpt_r10;
+	__u64	cpt_r9;
+	__u64	cpt_r8;
+	__u64	cpt_rax;
+	__u64	cpt_rcx;
+	__u64	cpt_rdx;
+	__u64	cpt_rsi;
+	__u64	cpt_rdi;
+	__u64	cpt_orig_rax;
+	__u64	cpt_rip;
+	__u64	cpt_cs;
+	__u64	cpt_eflags;
+	__u64	cpt_rsp;
+	__u64	cpt_ss;
+};
+
+struct cpt_ia64_regs
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	gr[128];
+	__u64	fr[256];
+	__u64	br[8];
+	__u64	nat[2];
+
+	__u64	ar_bspstore;
+	__u64	num_regs;
+	__u64	loadrs;
+	__u64	ar_bsp;
+	__u64	ar_unat;
+	__u64	ar_pfs;
+	__u64	ar_ccv;
+	__u64	ar_fpsr;
+	__u64	ar_csd;
+	__u64	ar_ssd;
+	__u64	ar_ec;
+	__u64	ar_lc;
+	__u64	ar_rsc;
+	__u64	ar_rnat;
+
+	__u64	cr_iip;
+	__u64	cr_ipsr;
+
+	__u64	cfm;
+	__u64	pr;
+
+	__u64	ibr[8];
+	__u64	dbr[8];
+};
+
+
+struct cpt_task_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_state;
+	__u64	cpt_flags;
+	__u64	cpt_ptrace;
+	__u32	cpt_prio;
+	__u32	cpt_static_prio;
+	__u32	cpt_policy;
+	__u32	cpt_rt_priority;
+
+	/* struct thread_info */
+	__u64	cpt_exec_domain;
+	__u64	cpt_thrflags;
+	__u64	cpt_thrstatus;
+	__u64	cpt_addr_limit;
+
+	__u64	cpt_personality;
+
+	__u64	cpt_mm;
+	__u64	cpt_files;
+	__u64	cpt_fs;
+	__u64	cpt_signal;
+	__u64	cpt_sighand;
+	__u64	cpt_sigblocked;
+	__u64	cpt_sigrblocked;
+	__u64	cpt_sigpending;
+	__u64	cpt_namespace;
+	__u64	cpt_sysvsem_undo;
+	__u32	cpt_pid;
+	__u32	cpt_tgid;
+	__u32	cpt_ppid;
+	__u32	cpt_rppid;
+	__u32	cpt_pgrp;
+	__u32	cpt_session;
+	__u32	cpt_old_pgrp;
+	__u32	__cpt_pad;
+	__u32	cpt_leader;
+	__u8	cpt_pn_state;
+	__u8	cpt_stopped_state;
+	__u8	cpt_sigsuspend_state;
+	__u8	cpt_64bit;
+	__u64	cpt_set_tid;
+	__u64	cpt_clear_tid;
+	__u32	cpt_exit_code;
+	__u32	cpt_exit_signal;
+	__u32	cpt_pdeath_signal;
+	__u32	cpt_user;
+	__u32	cpt_uid;
+	__u32	cpt_euid;
+	__u32	cpt_suid;
+	__u32	cpt_fsuid;
+	__u32	cpt_gid;
+	__u32	cpt_egid;
+	__u32	cpt_sgid;
+	__u32	cpt_fsgid;
+	__u32	cpt_ngids;
+	__u32	cpt_gids[32];
+	__u8	cpt_prctl_uac;
+	__u8	cpt_prctl_fpemu;
+	__u16	__cpt_pad1;
+	__u64	cpt_ecap;
+	__u64	cpt_icap;
+	__u64	cpt_pcap;
+	__u8	cpt_comm[16];
+	__u64	cpt_tls[3];
+	struct cpt_restart_block cpt_restart;
+	__u64	cpt_it_real_value;	/* V8: jiffies, V9..: nsec */
+	__u64	cpt_it_real_incr;	/* V8: jiffies, V9..: nsec */
+	__u64	cpt_it_prof_value;
+	__u64	cpt_it_prof_incr;
+	__u64	cpt_it_virt_value;
+	__u64	cpt_it_virt_incr;
+
+	__u16	cpt_used_math;
+	__u8	cpt_keepcap;
+	__u8	cpt_did_exec;
+	__u32	cpt_ptrace_message;
+
+	__u64	cpt_utime;
+	__u64	cpt_stime;
+	__u64	cpt_starttime;		/* V8: jiffies, V9...: timespec */
+	__u64	cpt_nvcsw;
+	__u64	cpt_nivcsw;
+	__u64	cpt_min_flt;
+	__u64	cpt_maj_flt;
+
+	__u64	cpt_sigsuspend_blocked;
+	__u64	cpt_cutime, cpt_cstime;
+	__u64	cpt_cnvcsw, cpt_cnivcsw;
+	__u64	cpt_cmin_flt, cpt_cmaj_flt;
+
+#define CPT_RLIM_NLIMITS 16
+	__u64	cpt_rlim_cur[CPT_RLIM_NLIMITS];
+	__u64	cpt_rlim_max[CPT_RLIM_NLIMITS];
+
+	__u64	cpt_task_ub;
+	__u64	cpt_exec_ub;
+	__u64	cpt_mm_ub;
+	__u64	cpt_fork_sub;
+} __attribute__ ((aligned (8)));
+
+struct cpt_sigaltstack_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_stack;
+	__u32	cpt_stacksize;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+
+struct cpt_signal_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_leader;
+	__u8	cpt_pgrp_type;
+	__u8	cpt_old_pgrp_type;
+	__u8	cpt_session_type;
+#define CPT_PGRP_NORMAL		0
+#define CPT_PGRP_ORPHAN		1
+#define CPT_PGRP_STRAY		2
+	__u8	__cpt_pad1;
+	__u64	cpt_pgrp;
+	__u64	cpt_old_pgrp;
+	__u64	cpt_session;
+	__u64	cpt_sigpending;
+	__u64	cpt_ctty;
+
+	__u32	cpt_curr_target;
+	__u32	cpt_group_exit;
+	__u32	cpt_group_exit_code;
+	__u32	cpt_group_exit_task;
+	__u32	cpt_notify_count;
+	__u32	cpt_group_stop_count;
+	__u32	cpt_stop_state;
+	__u32	__cpt_pad2;
+
+	__u64	cpt_utime, cpt_stime, cpt_cutime, cpt_cstime;
+	__u64	cpt_nvcsw, cpt_nivcsw, cpt_cnvcsw, cpt_cnivcsw;
+	__u64	cpt_min_flt, cpt_maj_flt, cpt_cmin_flt, cpt_cmaj_flt;
+
+	__u64	cpt_rlim_cur[CPT_RLIM_NLIMITS];
+	__u64	cpt_rlim_max[CPT_RLIM_NLIMITS];
+} __attribute__ ((aligned (8)));
+/* Followed by list of posix timers. */
+
+struct cpt_sighand_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+} __attribute__ ((aligned (8)));
+/* Followed by list of sighandles. */
+
+struct cpt_sighandler_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_signo;
+	__u32	__cpt_pad1;
+	__u64	cpt_handler;
+	__u64	cpt_restorer;
+	__u64	cpt_flags;
+	__u64	cpt_mask;
+} __attribute__ ((aligned (8)));
+
+struct cpt_netdev_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_index;
+	__u32	cpt_flags;
+	__u8	cpt_name[16];
+} __attribute__ ((aligned (8)));
+
+struct cpt_ifaddr_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u32	cpt_index;
+	__u8	cpt_family;
+	__u8	cpt_masklen;
+	__u8	cpt_flags;
+	__u8	cpt_scope;
+	__u32	cpt_address[4];
+	__u32	cpt_peer[4];
+	__u32	cpt_broadcast[4];
+	__u8	cpt_label[16];
+} __attribute__ ((aligned (8)));
+
+struct cpt_ipct_tuple
+{
+	__u32	cpt_src;
+	__u16	cpt_srcport;
+	__u16	__cpt_pad1;
+
+	__u32	cpt_dst;
+	__u16	cpt_dstport;
+	__u8	cpt_protonum;
+	__u8	cpt_dir;	/* TEMPORARY HACK TO VALIDATE CODE */
+} __attribute__ ((aligned (8)));
+
+struct cpt_nat_manip
+{
+	__u8	cpt_direction;
+	__u8	cpt_hooknum;
+	__u8	cpt_maniptype;
+	__u8	__cpt_pad1;
+
+	__u32	cpt_manip_addr;
+	__u16	cpt_manip_port;
+	__u16	__cpt_pad2;
+	__u32	__cpt_pad3;
+} __attribute__ ((aligned (8)));
+
+struct cpt_nat_seq
+{
+	__u32	cpt_correction_pos;
+	__u32	cpt_offset_before;
+	__u32	cpt_offset_after;
+	__u32	__cpt_pad1;
+} __attribute__ ((aligned (8)));
+
+struct cpt_ip_connexpect_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_timeout;
+	__u32	cpt_sibling_conntrack;	/* Index of child conntrack */
+	__u32	cpt_seq;		/* id in 2.6.15 */
+
+	struct cpt_ipct_tuple	cpt_ct_tuple;	/* NU 2.6.15 */
+	struct cpt_ipct_tuple	cpt_tuple;
+	struct cpt_ipct_tuple	cpt_mask;
+
+	/* union ip_conntrack_expect_help. Used by ftp, irc, amanda */
+	__u32	cpt_help[3];			/* NU 2.6.15 */
+	__u16	cpt_manip_proto;
+	__u8	cpt_dir;
+	__u8	cpt_flags;
+} __attribute__ ((aligned (8)));
+
+struct cpt_ip_conntrack_image
+{
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	struct cpt_ipct_tuple cpt_tuple[2];
+	__u64	cpt_status;
+	__u64	cpt_timeout;
+	__u32	cpt_index;
+	__u8	cpt_ct_helper;
+	__u8	cpt_nat_helper;
+	__u16	cpt_pad1;
+
+	/* union ip_conntrack_proto. Used by tcp and icmp. */
+	__u32	cpt_proto_data[12];
+
+	/* union ip_conntrack_help. Used by ftp and pptp helper.
+	 * We do not support pptp...
+	 */
+	__u32	cpt_help_data[6];
+
+	/* nat info */
+	__u32	cpt_initialized;	/* NU 2.6.15 */
+	__u32	cpt_num_manips;		/* NU 2.6.15 */
+	struct  cpt_nat_manip	cpt_nat_manips[6];	/* NU 2.6.15 */
+
+	struct	cpt_nat_seq	cpt_nat_seq[2];
+
+	__u32	cpt_masq_index;
+	__u32	cpt_id;
+	__u32	cpt_mark;
+} __attribute__ ((aligned (8)));
+
+struct cpt_ubparm
+{
+	__u64	barrier;
+	__u64	limit;
+	__u64	held;
+	__u64	maxheld;
+	__u64	minheld;
+	__u64	failcnt;
+} __attribute__ ((aligned (8)));
+
+struct cpt_beancounter_image {
+	__u64	cpt_next;
+	__u32	cpt_object;
+	__u16	cpt_hdrlen;
+	__u16	cpt_content;
+
+	__u64	cpt_parent;
+	__u32	cpt_id;
+	__u32	__cpt_pad;
+	struct	cpt_ubparm	cpt_parms[32 * 2];
+} __attribute__ ((aligned (8)));
+
+struct cpt_slm_sgreg_image {
+	__u64   cpt_next;
+	__u32   cpt_object;
+	__u16   cpt_hdrlen;
+	__u16   cpt_content;
+
+	__u32   cpt_size;
+	__u32   __cpt_pad1;
+	__u32   cpt_id;
+	__u16   cpt_resource;
+	__u8    cpt_regname[32];
+	__u8	__cpt_pad2[2];
+} __attribute__ ((aligned (8)));
+
+struct cpt_slm_obj_image {      
+	__u64   cpt_next;
+	__u32   cpt_object;
+	__u16   cpt_hdrlen;
+	__u16   cpt_content;
+
+	__u32   cpt_size;
+	__u32   __cpt_pad1;
+} __attribute__ ((aligned (8)));
+
+#ifdef __KERNEL__
+
+static inline void *cpt_ptr_import(__u64 ptr)
+{
+	return (void*)(unsigned long)ptr;
+}
+
+static inline __u64 cpt_ptr_export(void __user *ptr)
+{
+	return (__u64)(unsigned long)ptr;
+}
+
+static inline void cpt_sigset_import(sigset_t *sig, __u64 ptr)
+{
+	memcpy(sig, &ptr, sizeof(*sig));
+}
+
+static inline __u64 cpt_sigset_export(sigset_t *sig)
+{
+	return *(__u64*)sig;
+}
+
+static inline __u64 cpt_timespec_export(struct timespec *tv)
+{
+	return (((u64)tv->tv_sec) << 32) + tv->tv_nsec;
+}
+
+static inline void cpt_timespec_import(struct timespec *tv, __u64 val)
+{
+	tv->tv_sec = val>>32;
+	tv->tv_nsec = (val&0xFFFFFFFF);
+}
+
+static inline __u64 cpt_timeval_export(struct timeval *tv)
+{
+	return (((u64)tv->tv_sec) << 32) + tv->tv_usec;
+}
+
+static inline void cpt_timeval_import(struct timeval *tv, __u64 val)
+{
+	tv->tv_sec = val>>32;
+	tv->tv_usec = (val&0xFFFFFFFF);
+}
+
+#endif
+
+#endif /* __CPT_IMAGE_H_ */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/cpt_ioctl.h linux-2.6.16.46-0.12-027test011/include/linux/cpt_ioctl.h
--- linux-2.6.16.46-0.12.orig/include/linux/cpt_ioctl.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/cpt_ioctl.h	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,43 @@
+/*
+ *
+ *  include/linux/cpt_ioctl.h
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _CPT_IOCTL_H_
+#define _CPT_IOCTL_H_ 1
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define CPTCTLTYPE '-'
+#define CPT_SET_DUMPFD	_IOW(CPTCTLTYPE, 1, int)
+#define CPT_SET_STATUSFD _IOW(CPTCTLTYPE, 2, int)
+#define CPT_SET_LOCKFD	_IOW(CPTCTLTYPE, 3, int)
+#define CPT_SET_VEID	_IOW(CPTCTLTYPE, 4, int)
+#define CPT_SUSPEND	_IO(CPTCTLTYPE, 5)
+#define CPT_DUMP	_IO(CPTCTLTYPE, 6)
+#define CPT_UNDUMP	_IO(CPTCTLTYPE, 7)
+#define CPT_RESUME	_IO(CPTCTLTYPE, 8)
+#define CPT_KILL	_IO(CPTCTLTYPE, 9)
+#define CPT_JOIN_CONTEXT _IO(CPTCTLTYPE, 10)
+#define CPT_GET_CONTEXT _IOW(CPTCTLTYPE, 11, unsigned int)
+#define CPT_PUT_CONTEXT _IO(CPTCTLTYPE, 12)
+#define CPT_SET_PAGEINFDIN _IOW(CPTCTLTYPE, 13, int)
+#define CPT_SET_PAGEINFDOUT _IOW(CPTCTLTYPE, 14, int)
+#define CPT_PAGEIND	_IO(CPTCTLTYPE, 15)
+#define CPT_VMPREP	_IOW(CPTCTLTYPE, 16, int)
+#define CPT_SET_LAZY	_IOW(CPTCTLTYPE, 17, int)
+#define CPT_SET_CPU_FLAGS _IOW(CPTCTLTYPE, 18, unsigned int)
+#define CPT_TEST_CAPS	_IOW(CPTCTLTYPE, 19, unsigned int)
+#define CPT_TEST_VECAPS	_IOW(CPTCTLTYPE, 20, unsigned int)
+#define CPT_SET_ERRORFD _IOW(CPTCTLTYPE, 21, int)
+
+#define CPT_ITER	_IOW(CPTCTLTYPE, 23, int)
+
+#endif
diff -upr linux-2.6.16.46-0.12.orig/include/linux/dcache.h linux-2.6.16.46-0.12-027test011/include/linux/dcache.h
--- linux-2.6.16.46-0.12.orig/include/linux/dcache.h	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/dcache.h	2007-08-28 17:35:33.000000000 +0400
@@ -9,6 +9,8 @@
 #include <linux/cache.h>
 #include <linux/rcupdate.h>
 
+#include <ub/ub_dcache.h>
+
 struct nameidata;
 struct vfsmount;
 
@@ -111,6 +113,9 @@ struct dentry {
 	struct dcookie_struct *d_cookie; /* cookie, if any */
 #endif
 	int d_mounted;
+#ifdef CONFIG_USER_RESOURCE
+	struct dentry_beancounter dentry_bc;
+#endif
 	unsigned char d_iname[DNAME_INLINE_LEN_MIN];	/* small names */
 };
 
@@ -164,6 +169,11 @@ d_iput:		no		no		no       yes
 
 #define DCACHE_INOTIFY_PARENT_WATCHED	0x0020 /* Parent inode is watched */
 
+#define DCACHE_VIRTUAL		0x0100	/* ve accessible */
+
+extern void mark_tree_virtual(struct vfsmount *m, struct dentry *d);
+
+extern kmem_cache_t *dentry_cache;
 extern spinlock_t dcache_lock;
 
 /**
@@ -279,7 +289,12 @@ extern struct dentry * __d_lookup(struct
 /* validate "insecure" dentry pointer */
 extern int d_validate(struct dentry *, struct dentry *);
 
+extern int d_root_check(struct dentry *, struct vfsmount *);
 extern char * d_path(struct dentry *, struct vfsmount *, char *, int);
+extern char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt,
+			struct dentry *root, struct vfsmount *rootmnt,
+			char *buffer, int buflen);
+
   
 /* Allocation counts.. */
 
@@ -299,6 +314,12 @@ extern char * d_path(struct dentry *, st
 static inline struct dentry *dget(struct dentry *dentry)
 {
 	if (dentry) {
+#ifdef CONFIG_USER_RESOURCE
+		preempt_disable();
+		if (ub_dentry_on && ub_dget_testone(dentry))
+			BUG();
+		preempt_enable_no_resched();
+#endif
 		BUG_ON(!atomic_read(&dentry->d_count));
 		atomic_inc(&dentry->d_count);
 	}
@@ -342,6 +363,8 @@ extern struct dentry *lookup_create(stru
 
 extern int sysctl_vfs_cache_pressure;
 
+extern int check_area_access_ve(struct dentry *, struct vfsmount *);
+extern int check_area_execute_ve(struct dentry *, struct vfsmount *);
 #endif /* __KERNEL__ */
 
 #endif	/* __LINUX_DCACHE_H */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/device.h linux-2.6.16.46-0.12-027test011/include/linux/device.h
--- linux-2.6.16.46-0.12.orig/include/linux/device.h	2007-08-24 19:28:15.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/device.h	2007-08-28 17:35:31.000000000 +0400
@@ -287,6 +287,8 @@ extern struct class_device *class_device
 					__attribute__((format(printf,5,6)));
 extern void class_device_destroy(struct class *cls, dev_t devt);
 
+extern struct class net_class;
+
 
 /* interface for exporting device attributes */
 struct device_attribute {
diff -upr linux-2.6.16.46-0.12.orig/include/linux/devpts_fs.h linux-2.6.16.46-0.12-027test011/include/linux/devpts_fs.h
--- linux-2.6.16.46-0.12.orig/include/linux/devpts_fs.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/devpts_fs.h	2007-08-28 17:35:31.000000000 +0400
@@ -21,6 +21,16 @@ int devpts_pty_new(struct tty_struct *tt
 struct tty_struct *devpts_get_tty(int number);	 /* get tty structure */
 void devpts_pty_kill(int number);		 /* unlink */
 
+struct devpts_config {
+	int setuid;
+	int setgid;
+	uid_t   uid;
+	gid_t   gid;
+	umode_t mode;
+};
+
+extern struct devpts_config devpts_config;
+extern struct file_system_type devpts_fs_type;
 #else
 
 /* Dummy stubs in the no-pty case */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/elfcore.h linux-2.6.16.46-0.12-027test011/include/linux/elfcore.h
--- linux-2.6.16.46-0.12.orig/include/linux/elfcore.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/elfcore.h	2007-08-28 17:35:33.000000000 +0400
@@ -7,6 +7,8 @@
 #include <linux/user.h>
 #include <linux/ptrace.h>
 
+extern int sysctl_at_vsyscall;
+
 struct elf_siginfo
 {
 	int	si_signo;			/* signal number */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/eventpoll.h linux-2.6.16.46-0.12-027test011/include/linux/eventpoll.h
--- linux-2.6.16.46-0.12.orig/include/linux/eventpoll.h	2007-08-24 19:28:32.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/eventpoll.h	2007-08-28 17:35:33.000000000 +0400
@@ -85,6 +85,91 @@ static inline void eventpoll_release(str
 	eventpoll_release_file(file);
 }
 
+struct epoll_filefd {
+	struct file *file;
+	int fd;
+};
+
+/*
+ * This structure is stored inside the "private_data" member of the file
+ * structure and rapresent the main data sructure for the eventpoll
+ * interface.
+ */
+struct eventpoll {
+	/* Protect the this structure access */
+	rwlock_t lock;
+
+	/*
+	 * This semaphore is used to ensure that files are not removed
+	 * while epoll is using them. This is read-held during the event
+	 * collection loop and it is write-held during the file cleanup
+	 * path, the epoll file exit code and the ctl operations.
+	 */
+	struct rw_semaphore sem;
+
+	/* Wait queue used by sys_epoll_wait() */
+	wait_queue_head_t wq;
+
+	/* Wait queue used by file->poll() */
+	wait_queue_head_t poll_wait;
+
+	/* List of ready file descriptors */
+	struct list_head rdllist;
+
+	/* RB-Tree root used to store monitored fd structs */
+	struct rb_root rbr;
+};
+
+/*
+ * Each file descriptor added to the eventpoll interface will
+ * have an entry of this type linked to the hash.
+ */
+struct epitem {
+	/* RB-Tree node used to link this structure to the eventpoll rb-tree */
+	struct rb_node rbn;
+
+	/* List header used to link this structure to the eventpoll ready list */
+	struct list_head rdllink;
+
+	/* The file descriptor information this item refers to */
+	struct epoll_filefd ffd;
+
+	/* Number of active wait queue attached to poll operations */
+	int nwait;
+
+	/* List containing poll wait queues */
+	struct list_head pwqlist;
+
+	/* The "container" of this item */
+	struct eventpoll *ep;
+
+	/* The structure that describe the interested events and the source fd */
+	struct epoll_event event;
+
+	/*
+	 * Used to keep track of the usage count of the structure. This avoids
+	 * that the structure will desappear from underneath our processing.
+	 */
+	atomic_t usecnt;
+
+	/* List header used to link this item to the "struct file" items list */
+	struct list_head fllink;
+
+	/* List header used to link the item to the transfer list */
+	struct list_head txlink;
+
+	/*
+	 * This is used during the collection/transfer of events to userspace
+	 * to pin items empty events set.
+	 */
+	unsigned int revents;
+};
+
+extern struct semaphore epsem;
+struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
+int ep_insert(struct eventpoll *ep, struct epoll_event *event,
+		     struct file *tfile, int fd);
+void ep_release_epitem(struct epitem *epi);
 
 /*
  * called by aio code to create fd that can poll the  aio event queueQ
diff -upr linux-2.6.16.46-0.12.orig/include/linux/fairsched.h linux-2.6.16.46-0.12-027test011/include/linux/fairsched.h
--- linux-2.6.16.46-0.12.orig/include/linux/fairsched.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/fairsched.h	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,139 @@
+#ifndef __LINUX_FAIRSCHED_H__
+#define __LINUX_FAIRSCHED_H__
+
+/*
+ * Fair Scheduler
+ *
+ * Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#define FAIRSCHED_SET_RATE	0
+#define FAIRSCHED_DROP_RATE	1
+#define FAIRSCHED_GET_RATE	2
+
+#ifdef __KERNEL__
+#include <linux/cache.h>
+#include <asm/timex.h>
+
+#define FAIRSCHED_HAS_CPU_BINDING	0
+
+typedef struct { cycles_t t; } fschtag_t;
+typedef struct { unsigned long d; } fschdur_t;
+typedef struct { cycles_t v; } fschvalue_t;
+
+struct vcpu_scheduler;
+
+struct fairsched_node {
+	struct list_head runlist;
+
+	/*
+	 * Fair Scheduler fields
+	 *
+	 * nr_running >= nr_ready (!= if delayed)
+	 */
+	fschtag_t start_tag;
+	int nr_ready;
+	int nr_runnable;
+	int nr_pcpu;
+	int vcpus;
+
+	/*
+	 * Rate limitator fields
+	 */
+	cycles_t last_updated_at;
+	fschvalue_t value;	/* leaky function value */
+	cycles_t delay;		/* removed from schedule till */
+	unsigned char delayed;
+
+	/*
+	 * Configuration
+	 *
+	 * Read-only most of the time.
+	 */
+	unsigned weight ____cacheline_aligned_in_smp;
+				/* fairness weight */
+	unsigned char rate_limited;
+	unsigned rate;		/* max CPU share */
+	fschtag_t max_latency;
+	unsigned min_weight;
+
+	struct list_head nodelist;
+	int id;
+#ifdef CONFIG_VE
+	struct ve_struct *owner_env;
+#endif
+	struct vcpu_scheduler *vsched;
+};
+
+#define for_each_fairsched_node(n)	\
+	list_for_each_entry((n), &fairsched_node_head, nodelist)
+
+#ifdef CONFIG_FAIRSCHED
+
+#define FSCHWEIGHT_MAX			((1 << 16) - 1)
+#define FSCHRATE_SHIFT			10
+/* 
+ * Fairsched timeslice value (in msecs) specifies maximum possible time a 
+ * node can be running continuously without rescheduling, in other words
+ * main linux scheduler must call fairsched_scheduler() during 
+ * FSCH_TIMESLICE msecs or fairscheduler logic will be broken.
+ *
+ * NOTE: must correspond VCPU_TIMESLICE_MAX value
+ */
+#define FSCH_TIMESLICE			8
+
+/*
+ * Fairsched nodes used in boot process.
+ */
+extern struct fairsched_node fairsched_init_node;
+extern struct fairsched_node fairsched_idle_node;
+
+/*
+ * For proc output.
+ */
+extern unsigned fairsched_nr_cpus;
+extern void fairsched_cpu_online_map(int id, cpumask_t *mask);
+
+/* I hope vsched_id is always equal to fairsched node id  --SAW */
+#define task_fairsched_node_id(p)	task_vsched_id(p)
+
+/*
+ * Core functions.
+ */
+extern void fairsched_incrun(struct fairsched_node *node);
+extern void fairsched_decrun(struct fairsched_node *node);
+extern void fairsched_inccpu(struct fairsched_node *node);
+extern void fairsched_deccpu(struct fairsched_node *node);
+extern struct fairsched_node *fairsched_schedule(
+		struct fairsched_node *prev_node,
+		struct fairsched_node *cur_node,
+		int cur_node_active,
+		cycles_t time);
+
+/*
+ * Management functions.
+ */
+void fairsched_init_early(void);
+asmlinkage int sys_fairsched_mknod(unsigned int parent, unsigned int weight,
+		unsigned int newid);
+asmlinkage int sys_fairsched_rmnod(unsigned int id);
+asmlinkage int sys_fairsched_mvpr(pid_t pid, unsigned int nodeid);
+asmlinkage int sys_fairsched_vcpus(unsigned int id, unsigned int vcpus);
+
+#else /* CONFIG_FAIRSCHED */
+
+#define task_fairsched_node_id(p)	0
+#define fairsched_incrun(p)		do { } while (0)
+#define fairsched_decrun(p)		do { } while (0)
+#define fairsched_inccpu(p)		do { } while (0)
+#define fairsched_deccpu(p)		do { } while (0)
+#define fairsched_cpu_online_map(id, mask)      do { *(mask) = cpu_online_map; } while (0)
+
+#endif /* CONFIG_FAIRSCHED */
+#endif /* __KERNEL__ */
+
+#endif /* __LINUX_FAIRSCHED_H__ */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/faudit.h linux-2.6.16.46-0.12-027test011/include/linux/faudit.h
--- linux-2.6.16.46-0.12.orig/include/linux/faudit.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/faudit.h	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,46 @@
+/*
+ *  include/linux/faudit.h
+ *
+ *  Copyright (C) 2005  SWSoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __FAUDIT_H_
+#define __FAUDIT_H_
+
+#include <linux/config.h>
+#include <linux/virtinfo.h>
+
+struct vfsmount;
+struct dentry;
+struct super_block;
+struct kstatfs;
+struct kstat;
+struct pt_regs;
+
+struct faudit_regs_arg {
+	int err;
+	struct pt_regs *regs;
+};
+
+struct faudit_stat_arg {
+	int err;
+	struct vfsmount *mnt;
+	struct dentry *dentry;
+	struct kstat *stat;
+};
+
+struct faudit_statfs_arg {
+	int err;
+	struct super_block *sb;
+	struct kstatfs *stat;
+};
+
+#define VIRTINFO_FAUDIT			(0)
+#define VIRTINFO_FAUDIT_STAT		(VIRTINFO_FAUDIT + 0)
+#define VIRTINFO_FAUDIT_STATFS		(VIRTINFO_FAUDIT + 1)
+
+#endif
diff -upr linux-2.6.16.46-0.12.orig/include/linux/fs.h linux-2.6.16.46-0.12-027test011/include/linux/fs.h
--- linux-2.6.16.46-0.12.orig/include/linux/fs.h	2007-08-24 19:28:30.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/fs.h	2007-08-28 17:35:36.000000000 +0400
@@ -66,6 +66,7 @@ extern int dir_notify_enable;
 #define FMODE_LSEEK	4
 #define FMODE_PREAD	8
 #define FMODE_PWRITE	FMODE_PREAD	/* These go hand in hand */
+#define FMODE_QUOTACTL	4
 
 #define RW_MASK		1
 #define RWA_MASK	2
@@ -85,6 +86,7 @@ extern int dir_notify_enable;
 /* public flags for file_system_type */
 #define FS_REQUIRES_DEV 1 
 #define FS_BINARY_MOUNTDATA 2
+#define FS_VIRTUALIZED	64	/* Can mount this fstype inside ve */
 #define FS_REVAL_DOT	16384	/* Check the paths ".", ".." for staleness */
 #define FS_ODD_RENAME	32768	/* Temporary stuff; will go away as soon
 				  * as nfs_rename() will be cleaned up
@@ -305,6 +307,9 @@ struct iattr {
  * Includes for diskquotas.
  */
 #include <linux/quota.h>
+#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
+#include <linux/vzquota_qlnk.h>
+#endif
 
 /** 
  * enum positive_aop_returns - aop return codes with specific semantics
@@ -374,6 +379,7 @@ struct address_space_operations {
 			int);
 	/* migrate the contents of a page to the specified target */
 	int (*migratepage) (struct page *, struct page *);
+	int (*launder_page) (struct page *);
 };
 
 struct backing_dev_info;
@@ -446,6 +452,7 @@ struct block_device {
 #define PAGECACHE_TAG_WRITEBACK	1
 
 int mapping_tagged(struct address_space *mapping, int tag);
+int dupfd(struct file *file, unsigned int start);
 
 /*
  * Might pages of this file be mapped into userspace?
@@ -511,6 +518,9 @@ struct inode {
 #ifdef CONFIG_QUOTA
 	struct dquot		*i_dquot[MAXQUOTAS];
 #endif
+#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
+	struct vz_quota_ilink	i_qlnk;
+#endif
 	/* These three should probably be a union */
 	struct list_head	i_devices;
 	struct pipe_inode_info	*i_pipe;
@@ -545,6 +555,8 @@ struct inode {
 #endif
 };
 
+extern kmem_cache_t *inode_cachep;
+
 /*
  * NOTE: in a 32bit arch with a preemptable kernel and
  * an UP compile the i_size_read/write must be atomic
@@ -651,6 +663,7 @@ struct file {
 	struct fown_struct	f_owner;
 	unsigned int		f_uid, f_gid;
 	struct file_ra_state	f_ra;
+	struct user_beancounter	*f_ub;
 
 	unsigned long		f_version;
 	void			*f_security;
@@ -664,7 +677,9 @@ struct file {
 	spinlock_t		f_ep_lock;
 #endif /* #ifdef CONFIG_EPOLL */
 	struct address_space	*f_mapping;
+	struct ve_struct	*owner_env;
 };
+
 extern spinlock_t files_lock;
 #define file_list_lock() spin_lock(&files_lock);
 #define file_list_unlock() spin_unlock(&files_lock);
@@ -728,6 +743,9 @@ struct file_lock {
 	struct file *fl_file;
 	unsigned char fl_flags;
 	unsigned char fl_type;
+#ifdef CONFIG_USER_RESOURCE
+	unsigned char fl_charged;
+#endif
 	loff_t fl_start;
 	loff_t fl_end;
 
@@ -854,9 +872,6 @@ struct super_block {
 	struct list_head	s_instances;
 	struct quota_info	s_dquot;	/* Diskquota specific options */
 
-	unsigned int		s_prunes;	/* protected by dcache_lock */
-	wait_queue_head_t	s_wait_prunes;
-
 	int			s_frozen;
 	wait_queue_head_t	s_wait_unfrozen;
 
@@ -1118,6 +1133,8 @@ struct super_operations {
 
 	ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
 	ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
+
+	struct inode *(*get_quota_root)(struct super_block *);
 };
 
 /* Inode state bits.  Protected by inode_lock. */
@@ -1282,8 +1299,12 @@ struct file_system_type {
 	struct module *owner;
 	struct file_system_type * next;
 	struct list_head fs_supers;
+	struct ve_struct *owner_env;
 };
 
+void get_filesystem(struct file_system_type *fs);
+void put_filesystem(struct file_system_type *fs);
+
 struct super_block *get_sb_bdev(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data,
 	int (*fill_super)(struct super_block *, void *, int));
@@ -1317,10 +1338,15 @@ void unnamed_dev_init(void);
 
 extern int register_filesystem(struct file_system_type *);
 extern int unregister_filesystem(struct file_system_type *);
+extern int register_ve_fs_type(struct ve_struct *, struct file_system_type *,
+		struct file_system_type **, struct vfsmount **);
+extern void unregister_ve_fs_type(struct file_system_type *, struct vfsmount *);
+extern void umount_ve_fs_type(struct file_system_type *local_fs_type);
 extern struct vfsmount *kern_mount(struct file_system_type *);
 extern int may_umount_tree(struct vfsmount *);
 extern int may_umount(struct vfsmount *);
 extern void umount_tree(struct vfsmount *, int, struct list_head *);
+#define kern_umount mntput
 extern void release_mounts(struct list_head *);
 extern long do_mount(char *, char *, char *, unsigned long, void *);
 extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
@@ -1328,6 +1354,7 @@ extern void mnt_set_mountpoint(struct vf
 				  struct vfsmount *);
 
 extern int vfs_statfs(struct super_block *, struct kstatfs *);
+extern int faudit_statfs(struct super_block *, struct kstatfs *);
 
 /* /sys/fs */
 extern struct subsystem fs_subsys;
@@ -1434,7 +1461,7 @@ extern void chrdev_show(struct seq_file 
 #define BDEVNAME_SIZE	32	/* Largest string for a blockdev identifier */
 extern const char *__bdevname(dev_t, char *buffer);
 extern const char *bdevname(struct block_device *bdev, char *buffer);
-extern struct block_device *lookup_bdev(const char *);
+extern struct block_device *lookup_bdev(const char *, int mode);
 extern struct block_device *open_bdev_excl(const char *, int, void *);
 extern void close_bdev_excl(struct block_device *);
 extern void blkdev_show(struct seq_file *,off_t);
@@ -1462,7 +1489,7 @@ extern int fs_may_remount_ro(struct supe
 #define bio_data_dir(bio)	((bio)->bi_rw & 1)
 
 extern int check_disk_change(struct block_device *);
-extern int invalidate_inodes(struct super_block *);
+extern int invalidate_inodes(struct super_block *, int);
 extern int __invalidate_device(struct block_device *);
 extern int invalidate_partition(struct gendisk *, int);
 unsigned long invalidate_mapping_pages(struct address_space *mapping,
diff -upr linux-2.6.16.46-0.12.orig/include/linux/genhd.h linux-2.6.16.46-0.12-027test011/include/linux/genhd.h
--- linux-2.6.16.46-0.12.orig/include/linux/genhd.h	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/genhd.h	2007-08-28 17:35:31.000000000 +0400
@@ -424,6 +424,7 @@ static inline struct block_device *bdget
 	return bdget(MKDEV(disk->major, disk->first_minor) + index);
 }
 
+extern struct subsystem block_subsys;
 #endif
 
 #endif
diff -upr linux-2.6.16.46-0.12.orig/include/linux/gfp.h linux-2.6.16.46-0.12-027test011/include/linux/gfp.h
--- linux-2.6.16.46-0.12.orig/include/linux/gfp.h	2007-08-24 19:28:37.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/gfp.h	2007-08-28 17:35:30.000000000 +0400
@@ -47,6 +47,8 @@ struct vm_area_struct;
 #define __GFP_ZERO	((__force gfp_t)0x8000u)/* Return zeroed page on success */
 #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
 #define __GFP_HARDWALL   ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
+#define __GFP_UBC	((__force gfp_t)0x40000u)/* charge kmem in buddy and slab */
+#define __GFP_SOFT_UBC	((__force gfp_t)0x80000u)/* use soft charging */
 
 #define __GFP_BITS_SHIFT 20	/* Room for 20 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
@@ -55,14 +57,17 @@ struct vm_area_struct;
 #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
 			__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
 			__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
-			__GFP_NOMEMALLOC|__GFP_HARDWALL)
+			__GFP_NOMEMALLOC|__GFP_HARDWALL| \
+			__GFP_UBC|__GFP_SOFT_UBC)
 
 /* GFP_ATOMIC means both !wait (__GFP_WAIT not set) and use emergency pool */
 #define GFP_ATOMIC	(__GFP_HIGH)
 #define GFP_NOIO	(__GFP_WAIT)
 #define GFP_NOFS	(__GFP_WAIT | __GFP_IO)
 #define GFP_KERNEL	(__GFP_WAIT | __GFP_IO | __GFP_FS)
+#define GFP_KERNEL_UBC	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_UBC)
 #define GFP_USER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
+#define GFP_USER_UBC	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | __GFP_UBC)
 #define GFP_HIGHUSER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
 			 __GFP_HIGHMEM)
 
diff -upr linux-2.6.16.46-0.12.orig/include/linux/hardirq.h linux-2.6.16.46-0.12-027test011/include/linux/hardirq.h
--- linux-2.6.16.46-0.12.orig/include/linux/hardirq.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/hardirq.h	2007-08-28 17:35:31.000000000 +0400
@@ -7,6 +7,9 @@
 #include <asm/hardirq.h>
 #include <asm/system.h>
 
+#include <ub/ub_task.h>
+#include <linux/ve_task.h>
+
 /*
  * We put the hardirq and softirq counter into the preemption
  * counter. The bitmask has the following meaning:
@@ -59,6 +62,24 @@
 #define softirq_count()	(preempt_count() & SOFTIRQ_MASK)
 #define irq_count()	(preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK))
 
+#define save_context()		do {				\
+		struct task_struct *tsk;			\
+		if (hardirq_count() == HARDIRQ_OFFSET) {	\
+			tsk = current;				\
+			ve_save_context(tsk);			\
+			ub_save_context(tsk);			\
+		}						\
+	} while (0)
+
+#define restore_context()		do {			\
+		struct task_struct *tsk;			\
+		if (hardirq_count() == HARDIRQ_OFFSET) {	\
+			tsk = current;				\
+			ve_restore_context(tsk);		\
+			ub_restore_context(tsk);		\
+		}						\
+	} while (0)
+
 /*
  * Are we doing bottom half or hardware interrupt processing?
  * Are we in a softirq context? Interrupt context?
@@ -88,7 +109,10 @@ extern void synchronize_irq(unsigned int
 #endif
 
 #define nmi_enter()		irq_enter()
-#define nmi_exit()		sub_preempt_count(HARDIRQ_OFFSET)
+#define nmi_exit()		do {			\
+		restore_context();			\
+		sub_preempt_count(HARDIRQ_OFFSET);	\
+	} while (0)
 
 struct task_struct;
 
@@ -102,6 +126,7 @@ static inline void account_system_vtime(
 	do {						\
 		account_system_vtime(current);		\
 		add_preempt_count(HARDIRQ_OFFSET);	\
+		save_context();				\
 	} while (0)
 
 extern void irq_exit(void);
diff -upr linux-2.6.16.46-0.12.orig/include/linux/hrtimer.h linux-2.6.16.46-0.12-027test011/include/linux/hrtimer.h
--- linux-2.6.16.46-0.12.orig/include/linux/hrtimer.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/hrtimer.h	2007-08-28 17:35:33.000000000 +0400
@@ -140,4 +140,9 @@ extern void hrtimer_run_queues(void);
 /* Bootup initialization: */
 extern void __init hrtimers_init(void);
 
+extern long nanosleep_restart(struct restart_block *restart);
+
+extern ktime_t schedule_hrtimer(struct hrtimer *timer,
+				const enum hrtimer_mode mode);
+
 #endif
diff -upr linux-2.6.16.46-0.12.orig/include/linux/if_bridge.h linux-2.6.16.46-0.12-027test011/include/linux/if_bridge.h
--- linux-2.6.16.46-0.12.orig/include/linux/if_bridge.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/if_bridge.h	2007-08-28 17:35:32.000000000 +0400
@@ -44,6 +44,7 @@
 #define BRCTL_SET_PORT_PRIORITY 16
 #define BRCTL_SET_PATH_COST 17
 #define BRCTL_GET_FDB_ENTRIES 18
+#define BRCTL_SET_VIA_ORIG_DEV 19
 
 #define BR_STATE_DISABLED 0
 #define BR_STATE_LISTENING 1
@@ -72,6 +73,7 @@ struct __bridge_info
 	__u32 tcn_timer_value;
 	__u32 topology_change_timer_value;
 	__u32 gc_timer_value;
+	__u8 via_phys_dev;
 };
 
 struct __port_info
@@ -104,8 +106,11 @@ struct __fdb_entry
 
 #include <linux/netdevice.h>
 
+#define BR_ALREADY_SEEN 1
+
 extern void brioctl_set(int (*ioctl_hook)(unsigned int, void __user *));
 extern int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
+extern int (*br_hard_xmit_hook)(struct sk_buff *skb, struct net_bridge_port *port);
 extern int (*br_should_route_hook)(struct sk_buff **pskb);
 
 #endif
diff -upr linux-2.6.16.46-0.12.orig/include/linux/if_vlan.h linux-2.6.16.46-0.12-027test011/include/linux/if_vlan.h
--- linux-2.6.16.46-0.12.orig/include/linux/if_vlan.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/if_vlan.h	2007-08-28 17:35:32.000000000 +0400
@@ -77,6 +77,9 @@ struct vlan_group {
 	struct hlist_node	hlist;	/* linked list */
 	struct net_device *vlan_devices[VLAN_GROUP_ARRAY_LEN];
 	struct rcu_head		rcu;
+#ifdef CONFIG_VE
+	struct ve_struct	*owner;
+#endif
 };
 
 struct vlan_priority_tci_mapping {
diff -upr linux-2.6.16.46-0.12.orig/include/linux/inetdevice.h linux-2.6.16.46-0.12-027test011/include/linux/inetdevice.h
--- linux-2.6.16.46-0.12.orig/include/linux/inetdevice.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/inetdevice.h	2007-08-28 17:35:32.000000000 +0400
@@ -34,6 +34,12 @@ struct ipv4_devconf
 };
 
 extern struct ipv4_devconf ipv4_devconf;
+extern struct ipv4_devconf ipv4_devconf_dflt;
+#if defined(CONFIG_VE) && defined(CONFIG_INET)
+#define ve_ipv4_devconf		(*(get_exec_env()->_ipv4_devconf))
+#else
+#define ve_ipv4_devconf		ipv4_devconf
+#endif
 
 struct in_device
 {
@@ -60,29 +66,29 @@ struct in_device
 };
 
 #define IN_DEV_FORWARD(in_dev)		((in_dev)->cnf.forwarding)
-#define IN_DEV_MFORWARD(in_dev)		(ipv4_devconf.mc_forwarding && (in_dev)->cnf.mc_forwarding)
-#define IN_DEV_RPFILTER(in_dev)		(ipv4_devconf.rp_filter && (in_dev)->cnf.rp_filter)
-#define IN_DEV_SOURCE_ROUTE(in_dev)	(ipv4_devconf.accept_source_route && (in_dev)->cnf.accept_source_route)
-#define IN_DEV_BOOTP_RELAY(in_dev)	(ipv4_devconf.bootp_relay && (in_dev)->cnf.bootp_relay)
-
-#define IN_DEV_LOG_MARTIANS(in_dev)	(ipv4_devconf.log_martians || (in_dev)->cnf.log_martians)
-#define IN_DEV_PROXY_ARP(in_dev)	(ipv4_devconf.proxy_arp || (in_dev)->cnf.proxy_arp)
-#define IN_DEV_SHARED_MEDIA(in_dev)	(ipv4_devconf.shared_media || (in_dev)->cnf.shared_media)
-#define IN_DEV_TX_REDIRECTS(in_dev)	(ipv4_devconf.send_redirects || (in_dev)->cnf.send_redirects)
-#define IN_DEV_SEC_REDIRECTS(in_dev)	(ipv4_devconf.secure_redirects || (in_dev)->cnf.secure_redirects)
+#define IN_DEV_MFORWARD(in_dev)		(ve_ipv4_devconf.mc_forwarding && (in_dev)->cnf.mc_forwarding)
+#define IN_DEV_RPFILTER(in_dev)		(ve_ipv4_devconf.rp_filter && (in_dev)->cnf.rp_filter)
+#define IN_DEV_SOURCE_ROUTE(in_dev)	(ve_ipv4_devconf.accept_source_route && (in_dev)->cnf.accept_source_route)
+#define IN_DEV_BOOTP_RELAY(in_dev)	(ve_ipv4_devconf.bootp_relay && (in_dev)->cnf.bootp_relay)
+
+#define IN_DEV_LOG_MARTIANS(in_dev)	(ve_ipv4_devconf.log_martians || (in_dev)->cnf.log_martians)
+#define IN_DEV_PROXY_ARP(in_dev)	(ve_ipv4_devconf.proxy_arp || (in_dev)->cnf.proxy_arp)
+#define IN_DEV_SHARED_MEDIA(in_dev)	(ve_ipv4_devconf.shared_media || (in_dev)->cnf.shared_media)
+#define IN_DEV_TX_REDIRECTS(in_dev)	(ve_ipv4_devconf.send_redirects || (in_dev)->cnf.send_redirects)
+#define IN_DEV_SEC_REDIRECTS(in_dev)	(ve_ipv4_devconf.secure_redirects || (in_dev)->cnf.secure_redirects)
 #define IN_DEV_IDTAG(in_dev)		((in_dev)->cnf.tag)
 #define IN_DEV_MEDIUM_ID(in_dev)	((in_dev)->cnf.medium_id)
 #define IN_DEV_PROMOTE_SECONDARIES(in_dev)	(ipv4_devconf.promote_secondaries || (in_dev)->cnf.promote_secondaries)
 
 #define IN_DEV_RX_REDIRECTS(in_dev) \
 	((IN_DEV_FORWARD(in_dev) && \
-	  (ipv4_devconf.accept_redirects && (in_dev)->cnf.accept_redirects)) \
+	  (ve_ipv4_devconf.accept_redirects && (in_dev)->cnf.accept_redirects)) \
 	 || (!IN_DEV_FORWARD(in_dev) && \
-	  (ipv4_devconf.accept_redirects || (in_dev)->cnf.accept_redirects)))
+ 	  (ve_ipv4_devconf.accept_redirects || (in_dev)->cnf.accept_redirects)))
 
-#define IN_DEV_ARPFILTER(in_dev)	(ipv4_devconf.arp_filter || (in_dev)->cnf.arp_filter)
-#define IN_DEV_ARP_ANNOUNCE(in_dev)	(max(ipv4_devconf.arp_announce, (in_dev)->cnf.arp_announce))
-#define IN_DEV_ARP_IGNORE(in_dev)	(max(ipv4_devconf.arp_ignore, (in_dev)->cnf.arp_ignore))
+#define IN_DEV_ARPFILTER(in_dev)	(ve_ipv4_devconf.arp_filter || (in_dev)->cnf.arp_filter)
+#define IN_DEV_ARP_ANNOUNCE(in_dev)	(max(ve_ipv4_devconf.arp_announce, (in_dev)->cnf.arp_announce))
+#define IN_DEV_ARP_IGNORE(in_dev)	(max(ve_ipv4_devconf.arp_ignore, (in_dev)->cnf.arp_ignore))
 
 struct in_ifaddr
 {
@@ -113,6 +119,7 @@ extern u32		inet_select_addr(const struc
 extern u32		inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scope);
 extern struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, u32 mask);
 extern void		inet_forward_change(void);
+extern void		inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy);
 
 static __inline__ int inet_ifa_match(u32 addr, struct in_ifaddr *ifa)
 {
@@ -180,6 +187,16 @@ static inline void in_dev_put(struct in_
 #define __in_dev_put(idev)  atomic_dec(&(idev)->refcnt)
 #define in_dev_hold(idev)   atomic_inc(&(idev)->refcnt)
 
+struct ve_struct;
+#ifdef CONFIG_INET
+extern int devinet_sysctl_init(struct ve_struct *);
+extern void devinet_sysctl_fini(struct ve_struct *);
+extern void devinet_sysctl_free(struct ve_struct *);
+#else
+static inline int devinet_sysctl_init(struct ve_struct *ve) { return 0; }
+static inline void devinet_sysctl_fini(struct ve_struct *ve) { ; }
+static inline void devinet_sysctl_free(struct ve_struct *ve) { ; }
+#endif
 #endif /* __KERNEL__ */
 
 static __inline__ __u32 inet_make_mask(int logmask)
diff -upr linux-2.6.16.46-0.12.orig/include/linux/inotify.h linux-2.6.16.46-0.12-027test011/include/linux/inotify.h
--- linux-2.6.16.46-0.12.orig/include/linux/inotify.h	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/inotify.h	2007-08-28 17:35:33.000000000 +0400
@@ -68,6 +68,7 @@ struct inotify_event {
 #include <linux/dcache.h>
 #include <linux/fs.h>
 #include <linux/config.h>
+#include <linux/idr.h>
 
 /*
  * struct inotify_watch - represents a watch request on a specific inode
@@ -85,6 +86,8 @@ struct inotify_watch {
 	struct list_head	i_list; /* entry in inode's list */
 	atomic_t		count;  /* reference count */
 	struct inotify_handle	*ih;    /* associated inotify handle */
+	struct dentry		*dentry;
+	struct vfsmount		*mnt;
 	struct inode		*inode; /* associated inode */
 	__s32			wd;     /* watch descriptor */
 	__u32	           	mask;   /* event mask for this watch */
@@ -118,7 +121,10 @@ extern __s32 inotify_find_watch(struct i
 extern __s32 inotify_find_update_watch(struct inotify_handle *, struct inode *,
 					u32);
 extern __s32 inotify_add_watch(struct inotify_handle *, struct inotify_watch *,
-				struct inode *, __u32);
+			       struct inode *, __u32);
+extern __s32 inotify_add_watch_dget(struct inotify_handle *ih,
+				    struct inotify_watch *watch, struct dentry *d,
+				    struct vfsmount *mnt, u32 mask);
 extern int inotify_rm_watch(struct inotify_handle *, struct inotify_watch *);
 extern int inotify_rm_wd(struct inotify_handle *, __u32);
 extern void inotify_remove_watch_locked(struct inotify_handle *,
@@ -126,6 +132,66 @@ extern void inotify_remove_watch_locked(
 extern void get_inotify_watch(struct inotify_watch *);
 extern void put_inotify_watch(struct inotify_watch *);
 
+/*
+ * struct inotify_handle - represents an inotify instance
+ *
+ * This structure is protected by the mutex 'mutex'.
+ */
+struct inotify_handle {
+	struct idr		idr;		/* idr mapping wd -> watch */
+	struct mutex		mutex;		/* protects this bad boy */
+	struct list_head	watches;	/* list of watches */
+	atomic_t		count;		/* reference count */
+	u32			last_wd;	/* the last wd allocated */
+	const struct inotify_operations *in_ops; /* inotify caller operations */
+};
+
+
+/*
+ * struct inotify_device - represents an inotify instance
+ *
+ * This structure is protected by the mutex 'mutex'.
+ */
+struct inotify_device {
+	wait_queue_head_t 	wq;		/* wait queue for i/o */
+	struct mutex		ev_mutex;	/* protects event queue */
+	struct mutex		up_mutex;	/* synchronizes watch updates */
+	struct list_head 	events;		/* list of queued events */
+	atomic_t		count;		/* reference count */
+	struct user_struct	*user;		/* user who opened this dev */
+	struct inotify_handle	*ih;		/* inotify handle */
+	unsigned int		queue_size;	/* size of the queue (bytes) */
+	unsigned int		event_count;	/* number of pending events */
+	unsigned int		max_events;	/* maximum number of events */
+};
+
+/*
+ * struct inotify_kernel_event - An inotify event, originating from a watch and
+ * queued for user-space.  A list of these is attached to each instance of the
+ * device.  In read(), this list is walked and all events that can fit in the
+ * buffer are returned.
+ *
+ * Protected by dev->ev_mutex of the device in which we are queued.
+ */
+struct inotify_kernel_event {
+	struct inotify_event	event;	/* the user-space event */
+	struct list_head        list;	/* entry in inotify_device's list */
+	char			*name;	/* filename, if any */
+};
+
+/*
+ * struct inotify_user_watch - our version of an inotify_watch, we add
+ * a reference to the associated inotify_device.
+ */
+struct inotify_user_watch {
+	struct inotify_device	*dev;	/* associated device */
+	struct inotify_watch	wdata;	/* inotify watch data */
+};
+
+int inotify_create_watch(struct inotify_device *dev, struct dentry *d,
+			 struct vfsmount *mnt, u32 mask);
+
+
 #else
 
 static inline void inotify_d_instantiate(struct dentry *dentry,
diff -upr linux-2.6.16.46-0.12.orig/include/linux/ipc.h linux-2.6.16.46-0.12-027test011/include/linux/ipc.h
--- linux-2.6.16.46-0.12.orig/include/linux/ipc.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/ipc.h	2007-08-28 17:35:36.000000000 +0400
@@ -68,6 +68,15 @@ struct kern_ipc_perm
 	void		*security;
 };
 
+struct ve_struct;
+
+void prepare_ipc(void);
+int init_ve_ipc(struct ve_struct *ve);
+void fini_ve_ipc(struct ve_struct *ve);
+
+void ve_ipc_cleanup(void);
+void ve_ipc_free(struct ve_struct *env);
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_IPC_H */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/ipv6.h linux-2.6.16.46-0.12-027test011/include/linux/ipv6.h
--- linux-2.6.16.46-0.12.orig/include/linux/ipv6.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/ipv6.h	2007-08-28 17:35:32.000000000 +0400
@@ -415,12 +415,13 @@ static inline struct raw6_sock *raw6_sk(
 #define inet_v6_ipv6only(__sk)		0
 #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
 
-#define INET6_MATCH(__sk, __hash, __saddr, __daddr, __ports, __dif)\
+#define INET6_MATCH(__sk, __hash, __saddr, __daddr, __ports, __dif,__ve)\
 	(((__sk)->sk_hash == (__hash))				&& \
 	 ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports))  	&& \
 	 ((__sk)->sk_family		== AF_INET6)		&& \
 	 ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr))	&& \
 	 ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr))	&& \
+	 ve_accessible_strict((__sk)->owner_env, (__ve))	&& \
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
 
 #endif /* __KERNEL__ */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/jbd.h linux-2.6.16.46-0.12-027test011/include/linux/jbd.h
--- linux-2.6.16.46-0.12.orig/include/linux/jbd.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/jbd.h	2007-08-28 17:35:29.000000000 +0400
@@ -245,10 +245,15 @@ typedef struct journal_superblock_s
 #define J_ASSERT(assert)						\
 do {									\
 	if (!(assert)) {						\
+		unsigned long stack;					\
 		printk (KERN_EMERG					\
 			"Assertion failure in %s() at %s:%d: \"%s\"\n",	\
 			__FUNCTION__, __FILE__, __LINE__, # assert);	\
-		BUG();							\
+		printk("Stack=%p current=%p pid=%d ve=%d comm='%s'\n",	\
+				&stack, current, current->pid,		\
+				get_exec_env()->veid,			\
+				current->comm);				\
+		dump_stack();						\
 	}								\
 } while (0)
 
diff -upr linux-2.6.16.46-0.12.orig/include/linux/jiffies.h linux-2.6.16.46-0.12-027test011/include/linux/jiffies.h
--- linux-2.6.16.46-0.12.orig/include/linux/jiffies.h	2007-08-24 19:28:22.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/jiffies.h	2007-08-28 17:35:36.000000000 +0400
@@ -80,6 +80,7 @@
  */
 extern u64 __jiffy_data jiffies_64;
 extern unsigned long volatile __jiffy_data jiffies;
+extern unsigned long cycles_per_jiffy, cycles_per_clock;
 
 #if (BITS_PER_LONG < 64)
 u64 get_jiffies_64(void);
@@ -394,11 +395,13 @@ static inline clock_t jiffies_to_clock_t
 static inline unsigned long clock_t_to_jiffies(unsigned long x)
 {
 #if (HZ % USER_HZ)==0
+	WARN_ON((long)x < 0);
 	if (x >= ~0UL / (HZ / USER_HZ))
 		return ~0UL;
 	return x * (HZ / USER_HZ);
 #else
 	u64 jif;
+	WARN_ON((long)x < 0);
 
 	/* Don't worry about loss of precision here .. */
 	if (x >= ~0UL / HZ * USER_HZ)
@@ -413,6 +416,7 @@ static inline unsigned long clock_t_to_j
 
 static inline u64 jiffies_64_to_clock_t(u64 x)
 {
+	WARN_ON((s64)x < 0);
 #if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0
 	do_div(x, HZ / USER_HZ);
 #else
@@ -429,6 +433,7 @@ static inline u64 jiffies_64_to_clock_t(
 
 static inline u64 nsec_to_clock_t(u64 x)
 {
+	WARN_ON((s64)x < 0);
 #if (NSEC_PER_SEC % USER_HZ) == 0
 	do_div(x, (NSEC_PER_SEC / USER_HZ));
 #elif (USER_HZ % 512) == 0
diff -upr linux-2.6.16.46-0.12.orig/include/linux/kdbprivate.h linux-2.6.16.46-0.12-027test011/include/linux/kdbprivate.h
--- linux-2.6.16.46-0.12.orig/include/linux/kdbprivate.h	2007-08-24 19:28:10.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/kdbprivate.h	2007-08-28 17:35:31.000000000 +0400
@@ -474,8 +474,8 @@ extern initcall_t __kdb_initcall_start, 
 extern void kdb_runqueue(unsigned long cpu, kdb_printf_t xxx_printf);
 
 /* Simplify coexistence with NPTL */
-#define	kdb_do_each_thread(g, p) do_each_thread(g, p)
-#define	kdb_while_each_thread(g, p) while_each_thread(g, p)
+#define	kdb_do_each_thread(g, p) do_each_thread_all(g, p)
+#define	kdb_while_each_thread(g, p) while_each_thread_all(g, p)
 
 #define GFP_KDB (in_interrupt() ? GFP_ATOMIC : GFP_KERNEL)
 
diff -upr linux-2.6.16.46-0.12.orig/include/linux/kdev_t.h linux-2.6.16.46-0.12-027test011/include/linux/kdev_t.h
--- linux-2.6.16.46-0.12.orig/include/linux/kdev_t.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/kdev_t.h	2007-08-28 17:35:31.000000000 +0400
@@ -87,6 +87,57 @@ static inline unsigned sysv_minor(u32 de
 	return dev & 0x3ffff;
 }
 
+#define UNNAMED_MAJOR_COUNT	16
+
+#if UNNAMED_MAJOR_COUNT > 1
+
+extern int unnamed_dev_majors[UNNAMED_MAJOR_COUNT];
+
+static inline dev_t make_unnamed_dev(int idx)
+{
+	/*
+	 * Here we transfer bits from 8 to 8+log2(UNNAMED_MAJOR_COUNT) of the
+	 * unnamed device index into major number.
+	 */
+	return MKDEV(unnamed_dev_majors[(idx >> 8) & (UNNAMED_MAJOR_COUNT - 1)],
+		     idx & ~((UNNAMED_MAJOR_COUNT - 1) << 8));
+}
+
+static inline int unnamed_dev_idx(dev_t dev)
+{
+	int i;
+	for (i = 0; i < UNNAMED_MAJOR_COUNT &&
+				MAJOR(dev) != unnamed_dev_majors[i]; i++);
+	return MINOR(dev) | (i << 8);
+}
+
+static inline int is_unnamed_dev(dev_t dev)
+{
+	int i;
+	for (i = 0; i < UNNAMED_MAJOR_COUNT &&
+				MAJOR(dev) != unnamed_dev_majors[i]; i++);
+	return i < UNNAMED_MAJOR_COUNT;
+}
+
+#else /* UNNAMED_MAJOR_COUNT */
+
+static inline dev_t make_unnamed_dev(int idx)
+{
+	return MKDEV(0, idx);
+}
+
+static inline int unnamed_dev_idx(dev_t dev)
+{
+	return MINOR(dev);
+}
+
+static inline int is_unnamed_dev(dev_t dev)
+{
+	return MAJOR(dev) == 0;
+}
+
+#endif /* UNNAMED_MAJOR_COUNT */
+
 
 #else /* __KERNEL__ */
 
diff -upr linux-2.6.16.46-0.12.orig/include/linux/kernel.h linux-2.6.16.46-0.12-027test011/include/linux/kernel.h
--- linux-2.6.16.46-0.12.orig/include/linux/kernel.h	2007-08-24 19:28:35.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/kernel.h	2007-08-28 17:35:36.000000000 +0400
@@ -134,6 +134,9 @@ asmlinkage int vprintk(const char *fmt, 
 	__attribute__ ((format (printf, 1, 0)));
 asmlinkage int printk(const char * fmt, ...)
 	__attribute__ ((format (printf, 1, 2)));
+asmlinkage int ve_printk(int, const char * fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
+void prepare_printk(void);
 #else
 static inline int vprintk(const char *s, va_list args)
 	__attribute__ ((format (printf, 1, 0)));
@@ -141,8 +144,16 @@ static inline int vprintk(const char *s,
 static inline int printk(const char *s, ...)
 	__attribute__ ((format (printf, 1, 2)));
 static inline int printk(const char *s, ...) { return 0; }
+static inline int ve_printk(int d, const char *s, ...)
+	__attribute__ ((format (printf, 1, 2)));
+static inline int printk(int d, const char *s, ...) { return 0; }
+#define prepare_printk()	do { } while (0)
 #endif
 
+#define VE0_LOG		1
+#define VE_LOG		2
+#define VE_LOG_BOTH	(VE0_LOG | VE_LOG)
+
 unsigned long int_sqrt(unsigned long);
 
 static inline int __attribute_pure__ long_log2(unsigned long x)
@@ -173,10 +184,14 @@ static inline void console_verbose(void)
 }
 
 extern void bust_spinlocks(int yes);
+extern void wake_up_klogd(void);
 extern int oops_in_progress;		/* If set, an oops, panic(), BUG() or die() is in progress */
 extern __deprecated_for_modules int panic_timeout;
 extern int panic_on_oops;
+extern int decode_call_traces;
 extern int tainted;
+extern int kernel_text_csum_broken;
+extern void check_kernel_csum_bug(void);
 extern int unsupported;
 extern const char *print_tainted(void);
 extern void add_taint(unsigned);
diff -upr linux-2.6.16.46-0.12.orig/include/linux/kmem_cache.h linux-2.6.16.46-0.12-027test011/include/linux/kmem_cache.h
--- linux-2.6.16.46-0.12.orig/include/linux/kmem_cache.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/kmem_cache.h	2007-08-28 17:35:30.000000000 +0400
@@ -0,0 +1,200 @@
+#ifndef __KMEM_CACHE_H__
+#define __KMEM_CACHE_H__
+#include <linux/threads.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <asm/atomic.h>
+
+/*
+ * DEBUG	- 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
+ *		  SLAB_RED_ZONE & SLAB_POISON.
+ *		  0 for faster, smaller code (especially in the critical paths).
+ *
+ * STATS	- 1 to collect stats for /proc/slabinfo.
+ *		  0 for faster, smaller code (especially in the critical paths).
+ *
+ * FORCED_DEBUG	- 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
+ */
+
+#ifdef CONFIG_DEBUG_SLAB
+#define	SLAB_DEBUG		1
+#define	SLAB_STATS		1
+#define	SLAB_FORCED_DEBUG	1
+#else
+#define	SLAB_DEBUG		0
+#define	SLAB_STATS		0
+#define	SLAB_FORCED_DEBUG	0
+#endif
+
+/*
+ * struct array_cache
+ *
+ * Purpose:
+ * - LIFO ordering, to hand out cache-warm objects from _alloc
+ * - reduce the number of linked list operations
+ * - reduce spinlock operations
+ *
+ * The limit is stored in the per-cpu structure to reduce the data cache
+ * footprint.
+ *
+ */
+struct array_cache {
+	unsigned int avail;
+	unsigned int limit;
+	unsigned int batchcount;
+	unsigned int touched;
+	spinlock_t lock;
+	void *entry[0];		/*
+				 * Must have this definition in here for the proper
+				 * alignment of array_cache. Also simplifies accessing
+				 * the entries.
+				 * [0] is for gcc 2.95. It should really be [].
+				 */
+};
+
+/* bootstrap: The caches do not work without cpuarrays anymore,
+ * but the cpuarrays are allocated from the generic caches...
+ */
+#define BOOT_CPUCACHE_ENTRIES	1
+struct arraycache_init {
+	struct array_cache cache;
+	void *entries[BOOT_CPUCACHE_ENTRIES];
+};
+
+/*
+ * The slab lists for all objects.
+ */
+struct kmem_list3 {
+	struct list_head slabs_partial;	/* partial list first, better asm code */
+	struct list_head slabs_full;
+	struct list_head slabs_free;
+	unsigned long free_objects;
+	unsigned int free_limit;
+	unsigned int colour_next;	/* Per-node cache coloring */
+	spinlock_t list_lock;
+	struct array_cache *shared;	/* shared per node */
+	struct array_cache **alien;	/* on other nodes */
+	unsigned long next_reap;	/* updated without locking */
+	int free_touched;		/* updated without locking */
+};
+
+/*
+ * struct kmem_cache
+ *
+ * manages a cache.
+ */
+
+struct kmem_cache {
+/* 1) per-cpu data, touched during every alloc/free */
+	struct array_cache *array[NR_CPUS];
+	unsigned int batchcount;
+	unsigned int limit;
+	unsigned int shared;
+	unsigned int buffer_size;
+/* 2) touched by every alloc & free from the backend */
+	struct kmem_list3 *nodelists[MAX_NUMNODES];
+	unsigned int flags;	/* constant flags */
+	unsigned int num;	/* # of objs per slab */
+	spinlock_t spinlock;
+
+/* 3) cache_grow/shrink */
+	/* order of pgs per slab (2^n) */
+	unsigned int gfporder;
+
+	/* force GFP flags, e.g. GFP_DMA */
+	gfp_t gfpflags;
+
+	size_t colour;		/* cache colouring range */
+	unsigned int colour_off;	/* colour offset */
+	struct kmem_cache *slabp_cache;
+	unsigned int slab_size;
+	unsigned int dflags;	/* dynamic flags */
+
+	/* constructor func */
+	void (*ctor) (void *, struct kmem_cache *, unsigned long);
+
+	/* de-constructor func */
+	void (*dtor) (void *, struct kmem_cache *, unsigned long);
+
+/* 4) cache creation/removal */
+	const char *name;
+	struct list_head next;
+
+/* 5) statistics */
+	unsigned long grown;
+	unsigned long reaped;
+	unsigned long shrunk;
+#if SLAB_STATS
+	unsigned long num_active;
+	unsigned long num_allocations;
+	unsigned long high_mark;
+	unsigned long errors;
+	unsigned long max_freeable;
+	unsigned long node_allocs;
+	unsigned long node_frees;
+	atomic_t allochit;
+	atomic_t allocmiss;
+	atomic_t freehit;
+	atomic_t freemiss;
+#endif
+#if SLAB_DEBUG
+	/*
+	 * If debugging is enabled, then the allocator can add additional
+	 * fields and/or padding to every object. buffer_size contains the total
+	 * object size including these internal fields, the following two
+	 * variables contain the offset to the user object and its size.
+	 */
+	int obj_offset;
+	int obj_size;
+#endif
+#ifdef CONFIG_USER_RESOURCE
+	unsigned int		objuse;
+#endif
+};
+
+#define CFLGS_OFF_SLAB		(0x80000000UL)
+#define CFLGS_ENVIDS		(0x04000000UL)
+#define	OFF_SLAB(x)		((x)->flags & CFLGS_OFF_SLAB)
+#define ENVIDS(x)		((x)->flags & CFLGS_ENVIDS)
+#define kmem_mark_nocharge(c)	do { (c)->flags |= SLAB_NO_CHARGE; } while (0)
+
+struct slab;
+/* Functions for storing/retrieving the cachep and or slab from the
+ * global 'mem_map'. These are used to find the slab an obj belongs to.
+ * With kfree(), these are used to find the cache which an obj belongs to.
+ */
+static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
+{
+	page->lru.next = (struct list_head *)cache;
+}
+
+static inline struct kmem_cache *page_get_cache(struct page *page)
+{
+	return (struct kmem_cache *)page->lru.next;
+}
+
+static inline void page_set_slab(struct page *page, struct slab *slab)
+{
+	page->lru.prev = (struct list_head *)slab;
+}
+
+static inline struct slab *page_get_slab(struct page *page)
+{
+	return (struct slab *)page->lru.prev;
+}
+
+static inline struct kmem_cache *virt_to_cache(const void *obj)
+{
+	struct page *page = virt_to_page(obj);
+	return page_get_cache(page);
+}
+
+static inline struct slab *virt_to_slab(const void *obj)
+{
+	struct page *page = virt_to_page(obj);
+	return page_get_slab(page);
+}
+
+#endif
diff -upr linux-2.6.16.46-0.12.orig/include/linux/kmem_slab.h linux-2.6.16.46-0.12-027test011/include/linux/kmem_slab.h
--- linux-2.6.16.46-0.12.orig/include/linux/kmem_slab.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/kmem_slab.h	2007-08-28 17:35:29.000000000 +0400
@@ -0,0 +1,71 @@
+#ifndef __KMEM_SLAB_H__
+#define __KMEM_SLAB_H__
+
+/*
+ * kmem_bufctl_t:
+ *
+ * Bufctl's are used for linking objs within a slab
+ * linked offsets.
+ *
+ * This implementation relies on "struct page" for locating the cache &
+ * slab an object belongs to.
+ * This allows the bufctl structure to be small (one int), but limits
+ * the number of objects a slab (not a cache) can contain when off-slab
+ * bufctls are used. The limit is the size of the largest general cache
+ * that does not use off-slab slabs.
+ * For 32bit archs with 4 kB pages, is this 56.
+ * This is not serious, as it is only for large objects, when it is unwise
+ * to have too many per slab.
+ * Note: This limit can be raised by introducing a general cache whose size
+ * is less than 512 (PAGE_SIZE<<3), but greater than 256.
+ */
+
+typedef unsigned int kmem_bufctl_t;
+#define BUFCTL_END	(((kmem_bufctl_t)(~0U))-0)
+#define BUFCTL_FREE	(((kmem_bufctl_t)(~0U))-1)
+#define	SLAB_LIMIT	(((kmem_bufctl_t)(~0U))-2)
+
+/*
+ * struct slab
+ *
+ * Manages the objs in a slab. Placed either at the beginning of mem allocated
+ * for a slab, or allocated from an general cache.
+ * Slabs are chained into three list: fully used, partial, fully free slabs.
+ */
+struct slab {
+	struct list_head list;
+	unsigned long colouroff;
+	void *s_mem;		/* including colour offset */
+	unsigned int inuse;	/* num of objs active in slab */
+	kmem_bufctl_t free;
+	unsigned short nodeid;
+};
+
+/*
+ * struct slab_rcu
+ *
+ * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to
+ * arrange for kmem_freepages to be called via RCU.  This is useful if
+ * we need to approach a kernel structure obliquely, from its address
+ * obtained without the usual locking.  We can lock the structure to
+ * stabilize it and check it's still at the given address, only if we
+ * can be sure that the memory has not been meanwhile reused for some
+ * other kind of object (which our subsystem's lock might corrupt).
+ *
+ * rcu_read_lock before reading the address, then rcu_read_unlock after
+ * taking the spinlock within the structure expected at that address.
+ *
+ * We assume struct slab_rcu can overlay struct slab when destroying.
+ */
+struct slab_rcu {
+	struct rcu_head head;
+	struct kmem_cache *cachep;
+	void *addr;
+};
+
+static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
+{
+	return (kmem_bufctl_t *) (slabp + 1);
+}
+
+#endif
diff -upr linux-2.6.16.46-0.12.orig/include/linux/kobject.h linux-2.6.16.46-0.12-027test011/include/linux/kobject.h
--- linux-2.6.16.46-0.12.orig/include/linux/kobject.h	2007-08-24 19:28:22.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/kobject.h	2007-08-28 17:35:31.000000000 +0400
@@ -45,6 +45,8 @@ enum kobject_action {
 	KOBJ_UMOUNT	= (__force kobject_action_t) 0x05,	/* umount event for block devices (broken) */
 	KOBJ_OFFLINE	= (__force kobject_action_t) 0x06,	/* device offline */
 	KOBJ_ONLINE	= (__force kobject_action_t) 0x07,	/* device online */
+	KOBJ_START	= (__force kobject_action_t) 0x08,	/* start subsystem */
+	KOBJ_STOP	= (__force kobject_action_t) 0x09,	/* stop subsystem */
 };
 
 struct kobject {
@@ -189,6 +191,9 @@ extern struct subsystem kernel_subsys;
 /* The global /sys/hypervisor/ subsystem  */
 extern struct subsystem hypervisor_subsys;
 
+extern struct subsystem class_obj_subsys;
+extern struct subsystem class_subsys;
+
 /**
  * Helpers for setting the kset of registered objects.
  * Often, a registered object belongs to a kset embedded in a 
diff -upr linux-2.6.16.46-0.12.orig/include/linux/list.h linux-2.6.16.46-0.12-027test011/include/linux/list.h
--- linux-2.6.16.46-0.12.orig/include/linux/list.h	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/list.h	2007-08-28 17:35:29.000000000 +0400
@@ -336,6 +336,9 @@ static inline void list_splice_init(stru
 #define list_entry(ptr, type, member) \
 	container_of(ptr, type, member)
 
+#define list_first_entry(ptr, type, member) \
+	container_of((ptr)->next, type, member)
+
 /**
  * list_for_each	-	iterate over a list
  * @pos:	the &struct list_head to use as a loop counter.
diff -upr linux-2.6.16.46-0.12.orig/include/linux/lockd/lockd.h linux-2.6.16.46-0.12-027test011/include/linux/lockd/lockd.h
--- linux-2.6.16.46-0.12.orig/include/linux/lockd/lockd.h	2007-08-24 19:28:11.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/lockd/lockd.h	2007-08-28 17:35:36.000000000 +0400
@@ -57,6 +57,7 @@ struct nlm_host {
 	struct list_head	h_lockowners;	/* Lockowners for the client */
 	spinlock_t		h_lock;
 	struct nsm_handle *	h_nsmhandle;	/* NSM status handle */
+	struct ve_struct *	owner_env;
 };
 
 struct nsm_handle {
@@ -141,8 +142,11 @@ extern struct svc_procedure	nlmsvc_proce
 extern struct svc_procedure	nlmsvc_procedures4[];
 #endif
 extern struct svc_procedure	nsmsvc_procedures[];
-extern int			nlmsvc_grace_period;
-extern unsigned long		nlmsvc_timeout;
+
+#include <linux/ve_nfs.h>
+extern int			_nlmsvc_grace_period;
+extern unsigned long		_nlmsvc_timeout;
+
 extern int			nlm_max_hosts;
 extern int			nsm_use_hostnames;
 
diff -upr linux-2.6.16.46-0.12.orig/include/linux/major.h linux-2.6.16.46-0.12-027test011/include/linux/major.h
--- linux-2.6.16.46-0.12.orig/include/linux/major.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/major.h	2007-08-28 17:35:31.000000000 +0400
@@ -165,4 +165,7 @@
 
 #define VIOTAPE_MAJOR		230
 
+#define UNNAMED_EXTRA_MAJOR		130
+#define UNNAMED_EXTRA_MAJOR_COUNT	120
+
 #endif
diff -upr linux-2.6.16.46-0.12.orig/include/linux/mm.h linux-2.6.16.46-0.12-027test011/include/linux/mm.h
--- linux-2.6.16.46-0.12.orig/include/linux/mm.h	2007-08-24 19:28:37.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/mm.h	2007-08-28 17:35:33.000000000 +0400
@@ -600,16 +600,9 @@ struct page *shmem_nopage(struct vm_area
 int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new);
 struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
 					unsigned long addr);
-int shmem_lock(struct file *file, int lock, struct user_struct *user);
 #else
 #define shmem_nopage filemap_nopage
 
-static inline int shmem_lock(struct file *file, int lock,
-			     struct user_struct *user)
-{
-	return 0;
-}
-
 static inline int shmem_set_policy(struct vm_area_struct *vma,
 				   struct mempolicy *new)
 {
@@ -670,7 +663,9 @@ void free_pgd_range(struct mmu_gather **
 void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma,
 		unsigned long floor, unsigned long ceiling);
 int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
-			struct vm_area_struct *vma);
+		struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
+int __copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *vma,
+		      unsigned long addr, size_t size);
 int zeromap_page_range(struct vm_area_struct *vma, unsigned long from,
 			unsigned long size, pgprot_t prot);
 void unmap_mapping_range(struct address_space *mapping,
diff -upr linux-2.6.16.46-0.12.orig/include/linux/mm_types.h linux-2.6.16.46-0.12-027test011/include/linux/mm_types.h
--- linux-2.6.16.46-0.12.orig/include/linux/mm_types.h	2007-08-24 19:28:22.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/mm_types.h	2007-08-28 17:35:30.000000000 +0400
@@ -62,6 +62,12 @@ struct page {
 	void *virtual;			/* Kernel virtual address (NULL if
 					   not kmapped, ie. highmem) */
 #endif /* WANT_PAGE_VIRTUAL */
+#ifdef CONFIG_USER_RESOURCE
+	union {
+		struct user_beancounter *page_ub;
+		struct page_beancounter *page_pb;
+	} bc;
+#endif
 };
 
 #endif /* _LINUX_MM_TYPES_H */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/mman.h linux-2.6.16.46-0.12-027test011/include/linux/mman.h
--- linux-2.6.16.46-0.12.orig/include/linux/mman.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/mman.h	2007-08-28 17:35:30.000000000 +0400
@@ -59,6 +59,9 @@ static inline unsigned long
 calc_vm_flag_bits(unsigned long flags)
 {
 	return _calc_vm_trans(flags, MAP_GROWSDOWN,  VM_GROWSDOWN ) |
+#ifdef MAP_GROWSUP
+	       _calc_vm_trans(flags, MAP_GROWSUP,    VM_GROWSUP ) |
+#endif
 	       _calc_vm_trans(flags, MAP_DENYWRITE,  VM_DENYWRITE ) |
 	       _calc_vm_trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE) |
 	       _calc_vm_trans(flags, MAP_LOCKED,     VM_LOCKED    );
diff -upr linux-2.6.16.46-0.12.orig/include/linux/mount.h linux-2.6.16.46-0.12-027test011/include/linux/mount.h
--- linux-2.6.16.46-0.12.orig/include/linux/mount.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/mount.h	2007-08-28 17:35:31.000000000 +0400
@@ -47,6 +47,7 @@ struct vfsmount {
 	struct vfsmount *mnt_master;	/* slave is on master->mnt_slave_list */
 	struct namespace *mnt_namespace; /* containing namespace */
 	int mnt_pinned;
+	unsigned owner;
 };
 
 static inline struct vfsmount *mntget(struct vfsmount *mnt)
diff -upr linux-2.6.16.46-0.12.orig/include/linux/msg.h linux-2.6.16.46-0.12-027test011/include/linux/msg.h
--- linux-2.6.16.46-0.12.orig/include/linux/msg.h	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/msg.h	2007-08-28 17:35:33.000000000 +0400
@@ -92,6 +92,8 @@ struct msg_queue {
 	struct list_head q_senders;
 };
 
+int sysvipc_walk_msg(int (*func)(int, struct msg_queue*, void *), void *arg);
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_MSG_H */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/namei.h linux-2.6.16.46-0.12-027test011/include/linux/namei.h
--- linux-2.6.16.46-0.12.orig/include/linux/namei.h	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/namei.h	2007-08-28 17:35:31.000000000 +0400
@@ -48,12 +48,15 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA
 #define LOOKUP_PARENT		16
 #define LOOKUP_NOALT		32
 #define LOOKUP_REVAL		64
+#define LOOKUP_STRICT		128	/* no symlinks or other filesystems */
+
 /*
  * Intent data
  */
 #define LOOKUP_OPEN		(0x0100)
 #define LOOKUP_CREATE		(0x0200)
 #define LOOKUP_ACCESS		(0x0400)
+#define LOOKUP_NOAREACHECK	(0x0800)	/* no area check on lookup */
 
 extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *));
 extern int FASTCALL(__user_walk_fd(int dfd, const char __user *, unsigned, struct nameidata *));
diff -upr linux-2.6.16.46-0.12.orig/include/linux/namespace.h linux-2.6.16.46-0.12-027test011/include/linux/namespace.h
--- linux-2.6.16.46-0.12.orig/include/linux/namespace.h	2007-08-24 19:28:24.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/namespace.h	2007-08-28 17:35:31.000000000 +0400
@@ -16,6 +16,8 @@ struct namespace {
 	int event;
 };
 
+extern struct rw_semaphore namespace_sem;
+
 extern int copy_namespace(int, struct task_struct *);
 extern void __put_namespace(struct namespace *namespace);
 extern struct namespace *dup_namespace(struct task_struct *, struct fs_struct *);
diff -upr linux-2.6.16.46-0.12.orig/include/linux/netdevice.h linux-2.6.16.46-0.12-027test011/include/linux/netdevice.h
--- linux-2.6.16.46-0.12.orig/include/linux/netdevice.h	2007-08-24 19:28:35.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/netdevice.h	2007-08-28 17:35:32.000000000 +0400
@@ -38,6 +38,7 @@
 #include <linux/device.h>
 #include <linux/percpu.h>
 #include <linux/dmaengine.h>
+#include <linux/ctype.h>
 
 struct divert_blk;
 struct vlan_group;
@@ -235,6 +236,11 @@ enum netdev_state_t
 	__LINK_STATE_QDISC_RUNNING,
 };
 
+struct netdev_bc {
+	struct user_beancounter *exec_ub, *owner_ub;
+};
+
+#define netdev_bc(dev)		(&(dev)->dev_bc)
 
 /*
  * This structure holds at boot time configured netdevice settings. They
@@ -313,6 +319,8 @@ struct net_device
 #define NETIF_F_MC_ALL		16384   /* trigger driver on every multicast
 	                                 * address been added/deleted
 				         */
+#define	NETIF_F_VIRTUAL		0x40000000 /* can be registered in ve */
+#define	NETIF_F_VENET		0x80000000 /* Device is VENET device */
 
 	/* Segmentation offload features */
 #define NETIF_F_GSO_SHIFT	16
@@ -447,6 +455,7 @@ struct net_device
 	enum { NETREG_UNINITIALIZED=0,
 	       NETREG_REGISTERING,	/* called register_netdevice */
 	       NETREG_REGISTERED,	/* completed register todo */
+	       NETREG_REGISTER_ERR,	/* register todo failed */
 	       NETREG_UNREGISTERING,	/* called unregister_netdevice */
 	       NETREG_UNREGISTERED,	/* completed unregister todo */
 	       NETREG_RELEASED,		/* called free_netdev */
@@ -516,8 +525,17 @@ struct net_device
 	struct divert_blk	*divert;
 #endif /* CONFIG_NET_DIVERT */
 
+	struct ve_struct	*owner_env; /* Owner VE of the interface */
+	struct netdev_bc	dev_bc;
+
 	/* class/net/name entry */
 	struct class_device	class_dev;
+
+#ifdef CONFIG_VE
+	/* List entry in global devices list to keep track of their names
+	 * assignment */
+	struct list_head	dev_global_list_entry;
+#endif
 };
 
 #define	NETDEV_ALIGN		32
@@ -553,10 +571,25 @@ struct packet_type {
 #include <linux/interrupt.h>
 #include <linux/notifier.h>
 
+extern struct net_device		templ_loopback_dev;
 extern struct net_device		loopback_dev;		/* The loopback */
+#if defined(CONFIG_VE) && defined(CONFIG_NET)
+#define loopback_dev	(*get_exec_env()->_loopback_dev)
+#define ve0_loopback	(*get_ve0()->_loopback_dev)
+#define dev_base	(get_exec_env()->_net_dev_base)
+#define visible_dev_head(x)	(&(x)->_net_dev_head)
+#define visible_dev_index_head(x) (&(x)->_net_dev_index_head)
+#else
 extern struct net_device		*dev_base;		/* All devices */
+#define ve0_loopback	loopback_dev
+#define visible_dev_head(x)	NULL
+#define visible_dev_index_head(x) NULL
+#endif
 extern rwlock_t				dev_base_lock;		/* Device list lock */
 
+struct hlist_head *dev_name_hash(const char *name, struct ve_struct *env);
+struct hlist_head *dev_index_hash(int ifindex, struct ve_struct *env);
+
 extern int 			netdev_boot_setup_check(struct net_device *dev);
 extern unsigned long		netdev_boot_base(const char *prefix, int unit);
 extern struct net_device    *dev_getbyhwaddr(unsigned short type, char *hwaddr);
@@ -1020,6 +1053,18 @@ static inline int netif_needs_gso(struct
 		unlikely(skb->ip_summed != CHECKSUM_HW));
 }
 
+#if defined(CONFIG_VE) && defined(CONFIG_NET)
+static inline int ve_is_dev_movable(struct net_device *dev)
+{
+	return !(dev->features & NETIF_F_VIRTUAL);
+}
+#else
+static inline int ve_is_dev_movable(struct net_device *dev)
+{
+	return 0;
+}
+#endif
+
 #endif /* __KERNEL__ */
 
 #endif	/* _LINUX_DEV_H */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/netfilter/nf_conntrack_ftp.h linux-2.6.16.46-0.12-027test011/include/linux/netfilter/nf_conntrack_ftp.h
--- linux-2.6.16.46-0.12.orig/include/linux/netfilter/nf_conntrack_ftp.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/netfilter/nf_conntrack_ftp.h	2007-08-28 17:35:32.000000000 +0400
@@ -32,13 +32,22 @@ struct ip_conntrack_expect;
 
 /* For NAT to hook in when we find a packet which describes what other
  * connection we should expect. */
-extern unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
+typedef unsigned int (*ip_nat_helper_ftp_hook)(struct sk_buff **pskb,
 				       enum ip_conntrack_info ctinfo,
 				       enum ip_ct_ftp_type type,
 				       unsigned int matchoff,
 				       unsigned int matchlen,
 				       struct ip_conntrack_expect *exp,
 				       u32 *seq);
+extern ip_nat_helper_ftp_hook ip_nat_ftp_hook;
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_ip_nat_ftp_hook \
+	((ip_nat_helper_ftp_hook) \
+		(get_exec_env()->_ip_conntrack->_ip_nat_ftp_hook))
+#else
+#define ve_ip_nat_ftp_hook	ip_nat_ftp_hook
+#endif
 #endif /* __KERNEL__ */
 
 #endif /* _NF_CONNTRACK_FTP_H */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/netfilter/x_tables.h linux-2.6.16.46-0.12-027test011/include/linux/netfilter/x_tables.h
--- linux-2.6.16.46-0.12.orig/include/linux/netfilter/x_tables.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/netfilter/x_tables.h	2007-08-28 17:35:32.000000000 +0400
@@ -80,12 +80,19 @@ struct xt_counters_info
 
 #ifdef __KERNEL__
 
+#include <linux/config.h>
 #include <linux/netdevice.h>
 
 #define ASSERT_READ_LOCK(x)
 #define ASSERT_WRITE_LOCK(x)
 #include <linux/netfilter_ipv4/listhelp.h>
 
+#ifdef CONFIG_COMPAT
+#define COMPAT_TO_USER		1
+#define COMPAT_FROM_USER	-1
+#define COMPAT_CALC_SIZE	0
+#endif
+
 struct xt_match
 {
 	struct list_head list;
@@ -118,6 +125,10 @@ struct xt_match
 	/* Called when entry of this type deleted. */
 	void (*destroy)(void *matchinfo, unsigned int matchinfosize);
 
+#ifdef CONFIG_COMPAT
+	/* Called when userspace align differs from kernel space one */
+	int (*compat)(void *match, void **dstptr, int *size, int convert);
+#endif
 	/* Set this to THIS_MODULE if you are a module, otherwise NULL */
 	struct module *me;
 };
@@ -154,6 +165,10 @@ struct xt_target
 	/* Called when entry of this type deleted. */
 	void (*destroy)(void *targinfo, unsigned int targinfosize);
 
+#ifdef CONFIG_COMPAT
+	/* Called when userspace align differs from kernel space one */
+	int (*compat)(void *target, void **dstptr, int *size, int convert);
+#endif
 	/* Set this to THIS_MODULE if you are a module, otherwise NULL */
 	struct module *me;
 };
@@ -188,7 +203,7 @@ struct xt_table
 struct xt_table_info
 {
 	/* Size per table */
-	unsigned int size;
+	unsigned int size, alloc_size;
 	/* Number of entries: FIXME. --RR */
 	unsigned int number;
 	/* Initial number of entries. Needed for module usage count */
@@ -211,6 +226,10 @@ extern int xt_register_table(struct xt_t
 			     struct xt_table_info *bootstrap,
 			     struct xt_table_info *newinfo);
 extern void *xt_unregister_table(struct xt_table *table);
+extern struct xt_table *virt_xt_register_table(struct xt_table *table,
+			     struct xt_table_info *bootstrap,
+			     struct xt_table_info *newinfo);
+extern void *virt_xt_unregister_table(struct xt_table *table);
 
 extern struct xt_table_info *xt_replace_table(struct xt_table *table,
 					      unsigned int num_counters,
@@ -233,6 +252,34 @@ extern void xt_proto_fini(int af);
 extern struct xt_table_info *xt_alloc_table_info(unsigned int size);
 extern void xt_free_table_info(struct xt_table_info *info);
 
+#ifdef CONFIG_COMPAT
+#include <net/compat.h>
+
+/* FIXME: this works only on 32 bit tasks
+ * need to change whole approach in order to calculate align as function of
+ * current task alignment */
+
+struct compat_xt_counters
+{
+	u_int32_t cnt[4];
+};
+
+struct compat_xt_counters_info
+{
+	char name[XT_TABLE_MAXNAMELEN];
+	compat_uint_t num_counters;
+	struct compat_xt_counters counters[0];
+};
+
+#define COMPAT_XT_ALIGN(s) (((s) + (__alignof__(struct compat_xt_counters)-1)) \
+		& ~(__alignof__(struct compat_xt_counters)-1))
+
+extern int ipt_match_align_compat(void *match, void **dstptr,
+		int *size, int off, int convert);
+extern int ipt_target_align_compat(void *target, void **dstptr,
+		int *size, int off, int convert);
+
+#endif /* CONFIG_COMPAT */
 #endif /* __KERNEL__ */
 
 #endif /* _X_TABLES_H */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/netfilter/xt_conntrack.h linux-2.6.16.46-0.12-027test011/include/linux/netfilter/xt_conntrack.h
--- linux-2.6.16.46-0.12.orig/include/linux/netfilter/xt_conntrack.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/netfilter/xt_conntrack.h	2007-08-28 17:35:29.000000000 +0400
@@ -5,6 +5,7 @@
 #ifndef _XT_CONNTRACK_H
 #define _XT_CONNTRACK_H
 
+#include <linux/config.h>
 #include <linux/netfilter/nf_conntrack_tuple_common.h>
 #include <linux/in.h>
 
@@ -60,4 +61,21 @@ struct xt_conntrack_info
 	/* Inverse flags */
 	u_int8_t invflags;
 };
+
+#ifdef CONFIG_COMPAT
+struct compat_xt_conntrack_info
+{
+	compat_uint_t statemask, statusmask;
+
+	struct ip_conntrack_tuple tuple[IP_CT_DIR_MAX];
+	struct in_addr sipmsk[IP_CT_DIR_MAX], dipmsk[IP_CT_DIR_MAX];
+
+	compat_ulong_t expires_min, expires_max;
+
+	/* Flags word */
+	u_int8_t flags;
+	/* Inverse flags */
+	u_int8_t invflags;
+};
+#endif
 #endif /*_XT_CONNTRACK_H*/
diff -upr linux-2.6.16.46-0.12.orig/include/linux/netfilter/xt_helper.h linux-2.6.16.46-0.12-027test011/include/linux/netfilter/xt_helper.h
--- linux-2.6.16.46-0.12.orig/include/linux/netfilter/xt_helper.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/netfilter/xt_helper.h	2007-08-28 17:35:29.000000000 +0400
@@ -1,8 +1,17 @@
 #ifndef _XT_HELPER_H
 #define _XT_HELPER_H
 
+#include <linux/config.h>
+
 struct xt_helper_info {
 	int invert;
 	char name[30];
 };
+
+#ifdef CONFIG_COMPAT
+struct compat_xt_helper_info {
+	compat_int_t invert;
+	char name[30];
+};
+#endif
 #endif /* _XT_HELPER_H */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/netfilter/xt_limit.h linux-2.6.16.46-0.12-027test011/include/linux/netfilter/xt_limit.h
--- linux-2.6.16.46-0.12.orig/include/linux/netfilter/xt_limit.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/netfilter/xt_limit.h	2007-08-28 17:35:29.000000000 +0400
@@ -1,6 +1,8 @@
 #ifndef _XT_RATE_H
 #define _XT_RATE_H
 
+#include <linux/config.h>
+
 /* timings are in milliseconds. */
 #define XT_LIMIT_SCALE 10000
 
@@ -18,4 +20,19 @@ struct xt_rateinfo {
 	/* Ugly, ugly fucker. */
 	struct xt_rateinfo *master;
 };
+
+#ifdef CONFIG_COMPAT
+struct compat_xt_rateinfo {
+	u_int32_t avg;    /* Average secs between packets * scale */
+	u_int32_t burst;  /* Period multiplier for upper limit. */
+
+	/* Used internally by the kernel */
+	compat_ulong_t prev;
+	u_int32_t credit;
+	u_int32_t credit_cap, cost;
+
+	/* Ugly, ugly fucker. */
+	compat_uptr_t master;
+};
+#endif
 #endif /*_XT_RATE_H*/
diff -upr linux-2.6.16.46-0.12.orig/include/linux/netfilter/xt_state.h linux-2.6.16.46-0.12-027test011/include/linux/netfilter/xt_state.h
--- linux-2.6.16.46-0.12.orig/include/linux/netfilter/xt_state.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/netfilter/xt_state.h	2007-08-28 17:35:29.000000000 +0400
@@ -1,6 +1,8 @@
 #ifndef _XT_STATE_H
 #define _XT_STATE_H
 
+#include <linux/config.h>
+
 #define XT_STATE_BIT(ctinfo) (1 << ((ctinfo)%IP_CT_IS_REPLY+1))
 #define XT_STATE_INVALID (1 << 0)
 
@@ -10,4 +12,11 @@ struct xt_state_info
 {
 	unsigned int statemask;
 };
+
+#ifdef CONFIG_COMPAT
+struct compat_xt_state_info
+{
+	compat_uint_t statemask;
+};
+#endif
 #endif /*_XT_STATE_H*/
diff -upr linux-2.6.16.46-0.12.orig/include/linux/netfilter.h linux-2.6.16.46-0.12-027test011/include/linux/netfilter.h
--- linux-2.6.16.46-0.12.orig/include/linux/netfilter.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/netfilter.h	2007-08-28 17:35:32.000000000 +0400
@@ -112,7 +112,13 @@ void nf_unregister_hook(struct nf_hook_o
 int nf_register_sockopt(struct nf_sockopt_ops *reg);
 void nf_unregister_sockopt(struct nf_sockopt_ops *reg);
 
+#ifdef CONFIG_VE_IPTABLES
+#define ve_nf_hooks \
+       ((struct list_head (*)[NF_MAX_HOOKS])(get_exec_env()->_nf_hooks))
+#else
 extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
+#define ve_nf_hooks nf_hooks
+#endif
 
 /* those NF_LOG_* defines and struct nf_loginfo are legacy definitios that will
  * disappear once iptables is replaced with pkttables.  Please DO NOT use them
@@ -190,7 +196,7 @@ static inline int nf_hook_thresh(int pf,
 	if (!cond)
 		return 1;
 #ifndef CONFIG_NETFILTER_DEBUG
-	if (list_empty(&nf_hooks[pf][hook]))
+	if (list_empty(&ve_nf_hooks[pf][hook]))
 		return 1;
 #endif
 	return nf_hook_slow(pf, hook, pskb, indev, outdev, okfn, thresh);
diff -upr linux-2.6.16.46-0.12.orig/include/linux/netfilter_ipv4/ip_conntrack.h linux-2.6.16.46-0.12-027test011/include/linux/netfilter_ipv4/ip_conntrack.h
--- linux-2.6.16.46-0.12.orig/include/linux/netfilter_ipv4/ip_conntrack.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/netfilter_ipv4/ip_conntrack.h	2007-08-28 17:35:33.000000000 +0400
@@ -71,6 +71,10 @@ do {									\
 
 struct ip_conntrack_helper;
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/ve.h>
+#endif
+
 struct ip_conntrack
 {
 	/* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
@@ -122,6 +126,9 @@ struct ip_conntrack
 	/* Traversed often, so hopefully in different cacheline to top */
 	/* These are my tuples; original and reply */
 	struct ip_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX];
+#ifdef CONFIG_VE_IPTABLES
+        struct ve_struct *ct_owner_env;
+#endif
 };
 
 struct ip_conntrack_expect
@@ -232,7 +239,15 @@ extern void ip_conntrack_tcp_update(stru
 				    enum ip_conntrack_dir dir);
 
 /* Call me when a conntrack is destroyed. */
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_ip_conntrack_destroyed	\
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_destroyed)
+#else
 extern void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack);
+#define ve_ip_conntrack_destroyed	ip_conntrack_destroyed
+#endif
+
 
 /* Fake conntrack entry for untracked connections */
 extern struct ip_conntrack ip_conntrack_untracked;
@@ -261,7 +276,7 @@ extern void ip_conntrack_proto_put(struc
 extern void ip_ct_remove_expectations(struct ip_conntrack *ct);
 
 extern struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *,
-					       struct ip_conntrack_tuple *);
+		struct ip_conntrack_tuple *, struct user_beancounter *);
 
 extern void ip_conntrack_free(struct ip_conntrack *ct);
 
@@ -270,6 +285,8 @@ extern void ip_conntrack_hash_insert(str
 extern struct ip_conntrack_expect *
 __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple);
 
+extern void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp);
+
 extern struct ip_conntrack_expect *
 ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple);
 
@@ -291,6 +308,7 @@ static inline int is_dying(struct ip_con
 }
 
 extern unsigned int ip_conntrack_htable_size;
+extern int ip_conntrack_disable_ve0;
  
 #define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++)
 
@@ -341,6 +359,9 @@ ip_conntrack_event_cache(enum ip_conntra
 	struct ip_conntrack *ct = (struct ip_conntrack *)skb->nfct;
 	struct ip_conntrack_ecache *ecache;
 	
+	if (!ve_is_super(get_exec_env()))
+		return;
+
 	local_bh_disable();
 	ecache = &__get_cpu_var(ip_conntrack_ecache);
 	if (ct != ecache->ct)
@@ -352,7 +373,7 @@ ip_conntrack_event_cache(enum ip_conntra
 static inline void ip_conntrack_event(enum ip_conntrack_events event,
 				      struct ip_conntrack *ct)
 {
-	if (is_confirmed(ct) && !is_dying(ct))
+	if (is_confirmed(ct) && !is_dying(ct) && ve_is_super(get_exec_env()))
 		notifier_call_chain(&ip_conntrack_chain, event, ct);
 }
 
@@ -360,7 +381,8 @@ static inline void 
 ip_conntrack_expect_event(enum ip_conntrack_expect_events event,
 			  struct ip_conntrack_expect *exp)
 {
-	notifier_call_chain(&ip_conntrack_expect_chain, event, exp);
+	if (ve_is_super(get_exec_env()))
+		notifier_call_chain(&ip_conntrack_expect_chain, event, exp);
 }
 #else /* CONFIG_IP_NF_CONNTRACK_EVENTS */
 static inline void ip_conntrack_event_cache(enum ip_conntrack_events event, 
diff -upr linux-2.6.16.46-0.12.orig/include/linux/netfilter_ipv4/ip_conntrack_core.h linux-2.6.16.46-0.12-027test011/include/linux/netfilter_ipv4/ip_conntrack_core.h
--- linux-2.6.16.46-0.12.orig/include/linux/netfilter_ipv4/ip_conntrack_core.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/netfilter_ipv4/ip_conntrack_core.h	2007-08-28 17:35:32.000000000 +0400
@@ -3,7 +3,6 @@
 #include <linux/netfilter.h>
 
 #define MAX_IP_CT_PROTO 256
-extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
 
 /* This header is used to share core functionality between the
    standalone connection tracking module, and the compatibility layer's use
@@ -54,8 +53,26 @@ static inline int ip_conntrack_confirm(s
 
 extern void ip_ct_unlink_expect(struct ip_conntrack_expect *exp);
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_ip_ct_protos \
+	(get_exec_env()->_ip_conntrack->_ip_ct_protos)
+#define ve_ip_conntrack_hash	\
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_hash)
+#define ve_ip_conntrack_expect_list \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_expect_list)
+#define ve_ip_conntrack_vmalloc \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_vmalloc)
+#else
+extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
 extern struct list_head *ip_conntrack_hash;
 extern struct list_head ip_conntrack_expect_list;
+#define ve_ip_ct_protos			ip_ct_protos
+#define ve_ip_conntrack_hash		ip_conntrack_hash
+#define ve_ip_conntrack_expect_list	ip_conntrack_expect_list
+#define ve_ip_conntrack_vmalloc		ip_conntrack_vmalloc
+#endif /* CONFIG_VE_IPTABLES */
+
 extern rwlock_t ip_conntrack_lock;
 #endif /* _IP_CONNTRACK_CORE_H */
 
diff -upr linux-2.6.16.46-0.12.orig/include/linux/netfilter_ipv4/ip_conntrack_helper.h linux-2.6.16.46-0.12-027test011/include/linux/netfilter_ipv4/ip_conntrack_helper.h
--- linux-2.6.16.46-0.12.orig/include/linux/netfilter_ipv4/ip_conntrack_helper.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/netfilter_ipv4/ip_conntrack_helper.h	2007-08-28 17:35:32.000000000 +0400
@@ -31,6 +31,9 @@ struct ip_conntrack_helper
 extern int ip_conntrack_helper_register(struct ip_conntrack_helper *);
 extern void ip_conntrack_helper_unregister(struct ip_conntrack_helper *);
 
+extern int virt_ip_conntrack_helper_register(struct ip_conntrack_helper *);
+extern void virt_ip_conntrack_helper_unregister(struct ip_conntrack_helper *);
+
 /* Allocate space for an expectation: this is mandatory before calling 
    ip_conntrack_expect_related.  You will have to call put afterwards. */
 extern struct ip_conntrack_expect *
@@ -41,4 +44,5 @@ extern void ip_conntrack_expect_put(stru
 extern int ip_conntrack_expect_related(struct ip_conntrack_expect *exp);
 extern void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp);
 
+extern struct list_head helpers;
 #endif /*_IP_CONNTRACK_HELPER_H*/
diff -upr linux-2.6.16.46-0.12.orig/include/linux/netfilter_ipv4/ip_conntrack_irc.h linux-2.6.16.46-0.12-027test011/include/linux/netfilter_ipv4/ip_conntrack_irc.h
--- linux-2.6.16.46-0.12.orig/include/linux/netfilter_ipv4/ip_conntrack_irc.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/netfilter_ipv4/ip_conntrack_irc.h	2007-08-28 17:35:32.000000000 +0400
@@ -14,16 +14,26 @@
 #ifndef _IP_CONNTRACK_IRC_H
 #define _IP_CONNTRACK_IRC_H
 
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+
 /* This structure exists only once per master */
 struct ip_ct_irc_master {
 };
 
 #ifdef __KERNEL__
-extern unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb,
-				       enum ip_conntrack_info ctinfo,
-				       unsigned int matchoff,
-				       unsigned int matchlen,
-				       struct ip_conntrack_expect *exp);
+typedef unsigned int (*ip_nat_helper_irc_hook)(struct sk_buff **,
+		enum ip_conntrack_info, unsigned int, unsigned int,
+		struct ip_conntrack_expect *);
+
+extern ip_nat_helper_irc_hook ip_nat_irc_hook;
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_ip_nat_irc_hook \
+	((ip_nat_helper_irc_hook) \
+		(get_exec_env()->_ip_conntrack->_ip_nat_irc_hook))
+#else
+#define ve_ip_nat_irc_hook	ip_nat_irc_hook
+#endif
 
 #define IRC_PORT	6667
 
diff -upr linux-2.6.16.46-0.12.orig/include/linux/netfilter_ipv4/ip_conntrack_protocol.h linux-2.6.16.46-0.12-027test011/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
--- linux-2.6.16.46-0.12.orig/include/linux/netfilter_ipv4/ip_conntrack_protocol.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/netfilter_ipv4/ip_conntrack_protocol.h	2007-08-28 17:35:32.000000000 +0400
@@ -67,6 +67,7 @@ struct ip_conntrack_protocol
 /* Protocol registration. */
 extern int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto);
 extern void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto);
+
 /* Existing built-in protocols */
 extern struct ip_conntrack_protocol ip_conntrack_protocol_tcp;
 extern struct ip_conntrack_protocol ip_conntrack_protocol_udp;
@@ -74,6 +75,41 @@ extern struct ip_conntrack_protocol ip_c
 extern struct ip_conntrack_protocol ip_conntrack_generic_protocol;
 extern int ip_conntrack_protocol_tcp_init(void);
 
+#if defined(CONFIG_VE_IPTABLES) && defined(CONFIG_SYSCTL)
+#include <linux/sched.h>
+#define ve_ip_ct_tcp_timeouts \
+	(get_exec_env()->_ip_conntrack->_ip_ct_tcp_timeouts)
+#define ve_ip_ct_udp_timeout \
+	(get_exec_env()->_ip_conntrack->_ip_ct_udp_timeout)
+#define ve_ip_ct_udp_timeout_stream \
+	(get_exec_env()->_ip_conntrack->_ip_ct_udp_timeout_stream)
+#define ve_ip_ct_icmp_timeout \
+	(get_exec_env()->_ip_conntrack->_ip_ct_icmp_timeout)
+#define ve_ip_ct_generic_timeout \
+	(get_exec_env()->_ip_conntrack->_ip_ct_generic_timeout)
+#define ve_ip_ct_log_invalid	\
+	(get_exec_env()->_ip_conntrack->_ip_ct_log_invalid)
+#define ve_ip_ct_tcp_timeout_max_retrans \
+	(get_exec_env()->_ip_conntrack->_ip_ct_tcp_timeout_max_retrans)
+#define ve_ip_ct_tcp_loose	\
+	(get_exec_env()->_ip_conntrack->_ip_ct_tcp_loose)
+#define ve_ip_ct_tcp_be_liberal	\
+	(get_exec_env()->_ip_conntrack->_ip_ct_tcp_be_liberal)
+#define ve_ip_ct_tcp_max_retrans	\
+	(get_exec_env()->_ip_conntrack->_ip_ct_tcp_max_retrans)
+#else
+#define ve_ip_ct_tcp_timeouts		*tcp_timeouts
+#define ve_ip_ct_udp_timeout		ip_ct_udp_timeout
+#define ve_ip_ct_udp_timeout_stream	ip_ct_udp_timeout_stream
+#define ve_ip_ct_icmp_timeout		ip_ct_icmp_timeout
+#define ve_ip_ct_generic_timeout	ip_ct_generic_timeout
+#define ve_ip_ct_log_invalid		ip_ct_log_invalid
+#define ve_ip_ct_tcp_timeout_max_retrans ip_ct_tcp_timeout_max_retrans
+#define ve_ip_ct_tcp_loose		ip_ct_tcp_loose
+#define ve_ip_ct_tcp_be_liberal		ip_ct_tcp_be_liberal
+#define ve_ip_ct_tcp_max_retrans	ip_ct_tcp_max_retrans
+#endif
+
 /* Log invalid packets */
 extern unsigned int ip_ct_log_invalid;
 
@@ -85,10 +121,10 @@ extern int ip_ct_port_nfattr_to_tuple(st
 #ifdef CONFIG_SYSCTL
 #ifdef DEBUG_INVALID_PACKETS
 #define LOG_INVALID(proto) \
-	(ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW)
+	(ve_ip_ct_log_invalid == (proto) || ve_ip_ct_log_invalid == IPPROTO_RAW)
 #else
 #define LOG_INVALID(proto) \
-	((ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW) \
+	((ve_ip_ct_log_invalid == (proto) || ve_ip_ct_log_invalid == IPPROTO_RAW) \
 	 && net_ratelimit())
 #endif
 #else
diff -upr linux-2.6.16.46-0.12.orig/include/linux/netfilter_ipv4/ip_nat.h linux-2.6.16.46-0.12-027test011/include/linux/netfilter_ipv4/ip_nat.h
--- linux-2.6.16.46-0.12.orig/include/linux/netfilter_ipv4/ip_nat.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/netfilter_ipv4/ip_nat.h	2007-08-28 17:35:33.000000000 +0400
@@ -1,5 +1,6 @@
 #ifndef _IP_NAT_H
 #define _IP_NAT_H
+#include <linux/config.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
 
@@ -72,10 +73,29 @@ extern unsigned int ip_nat_setup_info(st
 extern int ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
 			     const struct ip_conntrack *ignored_conntrack);
 
+extern void ip_nat_hash_conntrack(struct ip_conntrack *conntrack);
+
 /* Calculate relative checksum. */
 extern u_int16_t ip_nat_cheat_check(u_int32_t oldvalinv,
 				    u_int32_t newval,
 				    u_int16_t oldcheck);
+
+#ifdef CONFIG_COMPAT
+#include <net/compat.h>
+
+struct compat_ip_nat_range
+{
+	compat_uint_t flags;
+	u_int32_t min_ip, max_ip;
+	union ip_conntrack_manip_proto min, max;
+};
+
+struct compat_ip_nat_multi_range
+{
+	compat_uint_t rangesize;
+	struct compat_ip_nat_range range[1];
+};
+#endif
 #else  /* !__KERNEL__: iptables wants this to compile. */
 #define ip_nat_multi_range ip_nat_multi_range_compat
 #endif /*__KERNEL__*/
diff -upr linux-2.6.16.46-0.12.orig/include/linux/netfilter_ipv4/ip_nat_rule.h linux-2.6.16.46-0.12-027test011/include/linux/netfilter_ipv4/ip_nat_rule.h
--- linux-2.6.16.46-0.12.orig/include/linux/netfilter_ipv4/ip_nat_rule.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/netfilter_ipv4/ip_nat_rule.h	2007-08-28 17:35:32.000000000 +0400
@@ -6,7 +6,7 @@
 
 #ifdef __KERNEL__
 
-extern int ip_nat_rule_init(void) __init;
+extern int ip_nat_rule_init(void);
 extern void ip_nat_rule_cleanup(void);
 extern int ip_nat_rule_find(struct sk_buff **pskb,
 			    unsigned int hooknum,
diff -upr linux-2.6.16.46-0.12.orig/include/linux/netfilter_ipv4/ip_tables.h linux-2.6.16.46-0.12-027test011/include/linux/netfilter_ipv4/ip_tables.h
--- linux-2.6.16.46-0.12.orig/include/linux/netfilter_ipv4/ip_tables.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/netfilter_ipv4/ip_tables.h	2007-08-28 17:35:32.000000000 +0400
@@ -16,6 +16,7 @@
 #define _IPTABLES_H
 
 #ifdef __KERNEL__
+#include <linux/config.h>
 #include <linux/if.h>
 #include <linux/types.h>
 #include <linux/in.h>
@@ -330,7 +331,7 @@ extern void ipt_init(void) __init;
 //#define ipt_register_table(tbl, repl) xt_register_table(AF_INET, tbl, repl)
 //#define ipt_unregister_table(tbl) xt_unregister_table(AF_INET, tbl)
 
-extern int ipt_register_table(struct ipt_table *table,
+extern struct ipt_table *ipt_register_table(struct ipt_table *table,
 			      const struct ipt_replace *repl);
 extern void ipt_unregister_table(struct ipt_table *table);
 
@@ -364,5 +365,70 @@ extern unsigned int ipt_do_table(struct 
 				 void *userdata);
 
 #define IPT_ALIGN(s) XT_ALIGN(s)
+
+#ifdef CONFIG_COMPAT
+#include <net/compat.h>
+
+struct compat_ipt_getinfo
+{
+	char name[IPT_TABLE_MAXNAMELEN];
+	compat_uint_t valid_hooks;
+	compat_uint_t hook_entry[NF_IP_NUMHOOKS];
+	compat_uint_t underflow[NF_IP_NUMHOOKS];
+	compat_uint_t num_entries;
+	compat_uint_t size;
+};
+
+struct compat_ipt_entry
+{
+	struct ipt_ip ip;
+	compat_uint_t nfcache;
+	u_int16_t target_offset;
+	u_int16_t next_offset;
+	compat_uint_t comefrom;
+	struct compat_xt_counters counters;
+	unsigned char elems[0];
+};
+
+struct compat_ipt_entry_match
+{
+	union {
+		struct {
+			u_int16_t match_size;
+			char name[IPT_FUNCTION_MAXNAMELEN];
+		} user;
+		struct {
+			u_int16_t match_size;
+			compat_uptr_t match;
+		} kernel;
+		u_int16_t match_size;
+	} u;
+	unsigned char data[0];
+};
+
+struct compat_ipt_entry_target
+{
+	union {
+		struct {
+			u_int16_t target_size;
+			char name[IPT_FUNCTION_MAXNAMELEN];
+		} user;
+		struct {
+			u_int16_t target_size;
+			compat_uptr_t target;
+		} kernel;
+		u_int16_t target_size;
+	} u;
+	unsigned char data[0];
+};
+
+#define COMPAT_IPT_ALIGN(s) 	COMPAT_XT_ALIGN(s)
+
+extern int ipt_match_align_compat(void *match, void **dstptr,
+		int *size, int off, int convert);
+extern int ipt_target_align_compat(void *target, void **dstptr,
+		int *size, int off, int convert);
+
+#endif /* CONFIG_COMPAT */
 #endif /*__KERNEL__*/
 #endif /* _IPTABLES_H */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/netfilter_ipv6/ip6_tables.h linux-2.6.16.46-0.12-027test011/include/linux/netfilter_ipv6/ip6_tables.h
--- linux-2.6.16.46-0.12.orig/include/linux/netfilter_ipv6/ip6_tables.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/netfilter_ipv6/ip6_tables.h	2007-08-28 17:35:32.000000000 +0400
@@ -340,7 +340,7 @@ extern void ip6t_init(void) __init;
 #define ip6t_register_match(match) xt_register_match(AF_INET6, match)
 #define ip6t_unregister_match(match) xt_unregister_match(AF_INET6, match)
 
-extern int ip6t_register_table(struct ip6t_table *table,
+extern struct ip6t_table *ip6t_register_table(struct ip6t_table *table,
 			       const struct ip6t_replace *repl);
 extern void ip6t_unregister_table(struct ip6t_table *table);
 extern unsigned int ip6t_do_table(struct sk_buff **pskb,
diff -upr linux-2.6.16.46-0.12.orig/include/linux/nfcalls.h linux-2.6.16.46-0.12-027test011/include/linux/nfcalls.h
--- linux-2.6.16.46-0.12.orig/include/linux/nfcalls.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/nfcalls.h	2007-08-28 17:35:36.000000000 +0400
@@ -0,0 +1,192 @@
+/*
+ *  include/linux/nfcalls.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _LINUX_NFCALLS_H
+#define _LINUX_NFCALLS_H
+
+#include <linux/rcupdate.h>
+
+#ifdef CONFIG_MODULES
+extern struct module no_module;
+
+#define DECL_KSYM_MODULE(name)				\
+	extern struct module *vz_mod_##name
+
+#define INIT_KSYM_MODULE(name)				\
+	struct module *vz_mod_##name = &no_module;	\
+	EXPORT_SYMBOL(vz_mod_##name)
+
+static inline void __vzksym_modresolve(struct module **modp, struct module *mod)
+{
+	/*
+	 * we want to be sure, that pointer updates are visible first:
+	 * 1. wmb() is here only for piece of sure
+	 *    (note, no rmb() in KSYMSAFECALL)
+	 * 2. synchronize_sched() guarantees that updates are visible
+	 *    on all cpus and allows us to remove rmb() in KSYMSAFECALL
+	 */
+	wmb(); synchronize_sched();
+	*modp = mod;
+	/* just to be sure, our changes are visible as soon as possible */
+	wmb(); synchronize_sched();
+}
+
+static inline void __vzksym_modunresolve(struct module **modp)
+{
+	/*
+	 * try_module_get() in KSYMSAFECALL should fail at this moment since
+	 * THIS_MODULE in in unloading state (we should be called from fini),
+	 * no need to syncronize pointers/ve_module updates.
+	 */
+	*modp = &no_module;
+	/*
+	 * synchronize_sched() guarantees here that we see
+	 * updated module pointer before the module really gets away
+	 */
+	synchronize_sched();
+}
+
+static inline int __vzksym_module_get(struct module *mod)
+{
+	/*
+	 * we want to avoid rmb(), so use synchronize_sched() in KSYMUNRESOLVE
+	 * and smp_read_barrier_depends() here...
+	 */
+	smp_read_barrier_depends(); /* for module loading */
+	if (!try_module_get(mod))
+		return -EBUSY;
+
+	return 0;
+}
+
+static inline void __vzksym_module_put(struct module *mod)
+{
+	module_put(mod);
+}
+#else
+#define DECL_KSYM_MODULE(name)
+#define INIT_KSYM_MODULE(name)
+#define __vzksym_modresolve(modp, mod)
+#define __vzksym_modunresolve(modp)
+#define __vzksym_module_get(mod)			(0)
+#define __vzksym_module_put(mod)
+#endif
+
+#define __KSYMERRCALL(err, type, mod, name, args)	\
+({							\
+	type ret = (type)err;				\
+	if (!__vzksym_module_get(vz_mod_##mod))	{	\
+		if (vz_##name)				\
+			ret = ((*vz_##name)args); 	\
+		__vzksym_module_put(vz_mod_##mod);	\
+	}						\
+	ret;						\
+})
+#define __KSYMSAFECALL_VOID(mod, name, args)		\
+do {							\
+	if (!__vzksym_module_get(vz_mod_##mod)) {	\
+		if (vz_##name)				\
+			((*vz_##name)args); 		\
+		__vzksym_module_put(vz_mod_##mod);	\
+	}						\
+} while (0)
+
+#define DECL_KSYM_CALL(type, name, args)		\
+	extern type (*vz_##name) args
+#define INIT_KSYM_CALL(type, name, args)		\
+	type (*vz_##name) args;				\
+	EXPORT_SYMBOL(vz_##name)
+
+#define KSYMERRCALL(err, mod, name, args)		\
+	__KSYMERRCALL(err, int, mod, name, args)
+#define KSYMSAFECALL(type, mod, name, args)		\
+	__KSYMERRCALL(0, type, mod, name, args)
+#define KSYMSAFECALL_VOID(mod, name, args)		\
+	__KSYMSAFECALL_VOID(mod, name, args)
+#define KSYMREF(name)					vz_##name
+
+/* should be called _after_ KSYMRESOLVE's */
+#define KSYMMODRESOLVE(name)				\
+	__vzksym_modresolve(&vz_mod_##name, THIS_MODULE)
+#define KSYMMODUNRESOLVE(name)				\
+	__vzksym_modunresolve(&vz_mod_##name)
+
+#define KSYMRESOLVE(name)				\
+	vz_##name = &name
+#define KSYMUNRESOLVE(name)				\
+	vz_##name = NULL
+
+#if defined(CONFIG_VE)
+DECL_KSYM_MODULE(ip_tables);
+DECL_KSYM_MODULE(ip6_tables);
+DECL_KSYM_MODULE(iptable_filter);
+DECL_KSYM_MODULE(ip6table_filter);
+DECL_KSYM_MODULE(iptable_mangle);
+DECL_KSYM_MODULE(ip6table_mangle);
+DECL_KSYM_MODULE(ip6t_multiport);
+DECL_KSYM_MODULE(ip6t_REJECT);
+DECL_KSYM_MODULE(ip_conntrack);
+DECL_KSYM_MODULE(ip_conntrack_ftp);
+DECL_KSYM_MODULE(ip_conntrack_irc);
+DECL_KSYM_MODULE(xt_conntrack);
+DECL_KSYM_MODULE(ip_nat);
+DECL_KSYM_MODULE(iptable_nat);
+DECL_KSYM_MODULE(ip_nat_ftp);
+DECL_KSYM_MODULE(ip_nat_irc);
+
+struct sk_buff;
+
+DECL_KSYM_CALL(int, init_netfilter, (void));
+DECL_KSYM_CALL(int, init_iptables, (void));
+DECL_KSYM_CALL(int, init_ip6tables, (void));
+DECL_KSYM_CALL(int, init_iptable_filter, (void));
+DECL_KSYM_CALL(int, init_ip6table_filter, (void));
+DECL_KSYM_CALL(int, init_iptable_mangle, (void));
+DECL_KSYM_CALL(int, init_ip6table_mangle, (void));
+DECL_KSYM_CALL(int, init_ip6table_multiport, (void));
+DECL_KSYM_CALL(int, init_ip6table_REJECT, (void));
+DECL_KSYM_CALL(int, init_iptable_conntrack, (void));
+DECL_KSYM_CALL(int, init_iptable_ftp, (void));
+DECL_KSYM_CALL(int, init_iptable_irc, (void));
+DECL_KSYM_CALL(int, ip_nat_init, (void));
+DECL_KSYM_CALL(int, init_iptable_nat, (void));
+DECL_KSYM_CALL(int, init_iptable_nat_ftp, (void));
+DECL_KSYM_CALL(int, init_iptable_nat_irc, (void));
+DECL_KSYM_CALL(void, fini_iptable_nat_irc, (void));
+DECL_KSYM_CALL(void, fini_iptable_nat_ftp, (void));
+DECL_KSYM_CALL(void, fini_iptable_nat, (void));
+DECL_KSYM_CALL(void, ip_nat_cleanup, (void));
+DECL_KSYM_CALL(void, fini_iptable_irc, (void));
+DECL_KSYM_CALL(void, fini_iptable_ftp, (void));
+DECL_KSYM_CALL(void, fini_iptable_conntrack, (void));
+DECL_KSYM_CALL(void, fini_iptable_filter, (void));
+DECL_KSYM_CALL(void, fini_ip6table_filter, (void));
+DECL_KSYM_CALL(void, fini_iptable_mangle, (void));
+DECL_KSYM_CALL(void, fini_ip6table_mangle, (void));
+DECL_KSYM_CALL(void, fini_ip6table_multiport, (void));
+DECL_KSYM_CALL(void, fini_ip6table_REJECT, (void));
+DECL_KSYM_CALL(void, fini_ip6tables, (void));
+DECL_KSYM_CALL(void, fini_iptables, (void));
+DECL_KSYM_CALL(void, fini_netfilter, (void));
+
+#include <linux/netfilter/x_tables.h>
+#endif /* CONFIG_VE */
+
+#if defined(CONFIG_VE_ETHDEV) || defined(CONFIG_VE_ETHDEV_MODULE)
+DECL_KSYM_MODULE(vzethdev);
+DECL_KSYM_CALL(int, veth_open, (struct net_device *dev));
+#endif
+
+#if defined(CONFIG_VE_CALLS) || defined(CONFIG_VE_CALLS_MODULE)
+DECL_KSYM_MODULE(vzmon);
+DECL_KSYM_CALL(void, real_do_env_free, (struct ve_struct *env));
+#endif
+
+#endif /* _LINUX_NFCALLS_H */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/notifier.h linux-2.6.16.46-0.12-027test011/include/linux/notifier.h
--- linux-2.6.16.46-0.12.orig/include/linux/notifier.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/notifier.h	2007-08-28 17:35:33.000000000 +0400
@@ -27,8 +27,9 @@ extern int notifier_call_chain(struct no
 
 #define NOTIFY_DONE		0x0000		/* Don't care */
 #define NOTIFY_OK		0x0001		/* Suits me */
+#define NOTIFY_FAIL		0x0002		/* Reject */
 #define NOTIFY_STOP_MASK	0x8000		/* Don't call further */
-#define NOTIFY_BAD		(NOTIFY_STOP_MASK|0x0002)	/* Bad/Veto action	*/
+#define NOTIFY_BAD		(NOTIFY_STOP_MASK|NOTIFY_FAIL)	/* Bad/Veto action	*/
 /*
  * Clean way to return from the notifier and stop further calls.
  */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/page-flags.h linux-2.6.16.46-0.12-027test011/include/linux/page-flags.h
--- linux-2.6.16.46-0.12.orig/include/linux/page-flags.h	2007-08-24 19:28:37.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/page-flags.h	2007-08-28 17:35:33.000000000 +0400
@@ -79,11 +79,20 @@
 #define PG_uncached		20	/* Page has been mapped as uncached */
 #define PG_truncate		21	/* Pagecache has been truncated/invalidated */
 #define PG_cantrunc		22	/* Pagecache may be able to be truncated/invalidated */
+#define PG_checkpointed		23
+
+/*
+ * the flags below are used under CONFIG_XEN and CONFIG_PAGE_STATE (s390 only)
+ * we may heve them booth off and enjoy our iterative migration
+ */
+
+#if 0
 #define PG_foreign		23	/* Page is owned by foreign allocator. */
 
 #define PG_state_change		24	/* HV page state is changing. */
 #define PG_discarded		25	/* HV page has been discarded. */
 #define PG_writable		26	/* HV page is mapped writable. */
+#endif
 
 /*
  * Global page accounting.  One instance per CPU.  Only unsigned longs are
@@ -367,6 +376,7 @@ extern void __mod_page_state_offset(unsi
 #define SetPageUncached(page)	set_bit(PG_uncached, &(page)->flags)
 #define ClearPageUncached(page)	clear_bit(PG_uncached, &(page)->flags)
 
+#if 0
 #define PageForeign(page)	test_bit(PG_foreign, &(page)->flags)
 #define SetPageForeign(page, dtor) do {		\
 	set_bit(PG_foreign, &(page)->flags);	\
@@ -392,6 +402,9 @@ extern void __mod_page_state_offset(unsi
 #define PageWritable(page) test_bit(PG_writable, &(page)->flags)
 #define SetPageWritable(page) set_bit(PG_writable, &(page)->flags)
 #define ClearPageWritable(page) clear_bit(PG_writable, &(page)->flags)
+#endif
+
+#define ClearPageCheckpointed(page) clear_bit(PG_checkpointed, &(page)->flags)
 
 struct page;	/* forward declaration */
 
diff -upr linux-2.6.16.46-0.12.orig/include/linux/page-states.h linux-2.6.16.46-0.12-027test011/include/linux/page-states.h
--- linux-2.6.16.46-0.12.orig/include/linux/page-states.h	2007-08-24 19:28:23.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/page-states.h	2007-08-28 17:35:30.000000000 +0400
@@ -87,29 +87,35 @@ static inline int page_make_stable(struc
 
 static inline void page_make_volatile(struct page *page, unsigned int offset)
 {
+#ifdef CONFIG_PAGE_STATES
 	extern void __page_make_volatile(struct page *, unsigned int offset);
 	if (!page_host_discards())
 		return;
 	if (likely(!test_bit(PG_discarded, &page->flags)))
 		__page_make_volatile(page, offset);
+#endif
 }
 
 static inline void page_check_writable(struct page *page, pte_t pte,
 				       unsigned int offset)
 {
+#ifdef CONFIG_PAGE_STATES
 	extern void __page_check_writable(struct page *, pte_t, unsigned int);
 	if (!page_host_discards() || !pte_write(pte) ||
 	    test_bit(PG_writable, &page->flags))
 		return;
 	__page_check_writable(page, pte, offset);
+#endif
 }
 
 static inline void page_reset_writable(struct page *page)
 {
+#ifdef CONFIG_PAGE_STATES
 	extern void __page_reset_writable(struct page *);
 	if (!page_host_discards() || !test_bit(PG_writable, &page->flags))
 		return;
 	__page_reset_writable(page);
+#endif
 }
 
 #endif /* _LINUX_PAGE_STATES_H */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/percpu.h linux-2.6.16.46-0.12-027test011/include/linux/percpu.h
--- linux-2.6.16.46-0.12.orig/include/linux/percpu.h	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/percpu.h	2007-08-28 17:35:30.000000000 +0400
@@ -36,16 +36,25 @@ struct percpu_data {
         (__typeof__(ptr))__p->ptrs[(cpu)];	\
 })
 
-extern void *__alloc_percpu(size_t size);
+#define static_percpu_ptr(sptr, sptrs) ({		\
+		int i;					\
+		for (i = 0; i < NR_CPUS; i++)		\
+			(sptr)->ptrs[i] = &(sptrs)[i];	\
+		((void *)(~(unsigned long)(sptr)));	\
+	})
+
+extern void *__alloc_percpu_mask(size_t size, gfp_t gfp);
 extern void free_percpu(const void *);
 
 #else /* CONFIG_SMP */
 
 #define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
 
-static inline void *__alloc_percpu(size_t size)
+#define static_percpu_ptr(sptr, sptrs)	(&sptrs[0])
+
+static inline void *__alloc_percpu_mask(size_t size, gfp_t gfp)
 {
-	void *ret = kmalloc(size, GFP_KERNEL);
+	void *ret = kmalloc(size, gfp);
 	if (ret)
 		memset(ret, 0, size);
 	return ret;
@@ -58,6 +67,11 @@ static inline void free_percpu(const voi
 #endif /* CONFIG_SMP */
 
 /* Simple wrapper for the common case: zeros memory. */
-#define alloc_percpu(type)	((type *)(__alloc_percpu(sizeof(type))))
+#define __alloc_percpu(size)		\
+	__alloc_percpu_mask((size), GFP_KERNEL)
+#define alloc_percpu(type)		\
+	((type *)(__alloc_percpu_mask(sizeof(type), GFP_KERNEL)))
+#define alloc_percpu_atomic(type)	\
+	((type *)(__alloc_percpu_mask(sizeof(type), GFP_ATOMIC)))
 
 #endif /* __LINUX_PERCPU_H */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/pid.h linux-2.6.16.46-0.12-027test011/include/linux/pid.h
--- linux-2.6.16.46-0.12.orig/include/linux/pid.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/pid.h	2007-08-28 17:35:31.000000000 +0400
@@ -1,6 +1,18 @@
 #ifndef _LINUX_PID_H
 #define _LINUX_PID_H
 
+#define VPID_BIT	10
+#define VPID_DIV	(1<<VPID_BIT)
+
+#ifdef CONFIG_VE
+#define __is_virtual_pid(pid)	((pid) & VPID_DIV)
+#define is_virtual_pid(pid)	\
+   (__is_virtual_pid(pid) || ((pid)==1 && !ve_is_super(get_exec_env())))
+#else
+#define __is_virtual_pid(pid)	0
+#define is_virtual_pid(pid)	0
+#endif
+
 enum pid_type
 {
 	PIDTYPE_PID,
@@ -15,6 +27,9 @@ struct pid
 	/* Try to keep pid_chain in the same cacheline as nr for find_pid */
 	int nr;
 	struct hlist_node pid_chain;
+#ifdef CONFIG_VE
+	int vnr;
+#endif
 	/* list of pids with the same nr, only one of them is in the hash */
 	struct list_head pid_list;
 };
@@ -40,16 +55,91 @@ extern int alloc_pidmap(void);
 extern void FASTCALL(free_pidmap(int));
 extern void switch_exec_pids(struct task_struct *leader, struct task_struct *thread);
 
-#define do_each_task_pid(who, type, task)				\
-	if ((task = find_task_by_pid_type(type, who))) {		\
+#ifndef CONFIG_VE
+
+#define vpid_to_pid(pid)	(pid)
+#define __vpid_to_pid(pid)	(pid)
+#define pid_type_to_vpid(type, pid)	(pid)
+#define __pid_type_to_vpid(type, pid)	(pid)
+
+#define comb_vpid_to_pid(pid)	(pid)
+#define comb_pid_to_vpid(pid)	(pid)
+
+#else
+
+struct ve_struct;
+extern void free_vpid(int vpid, struct ve_struct *ve);
+extern int alloc_vpid(int pid, int vpid);
+extern int vpid_to_pid(int pid);
+extern int __vpid_to_pid(int pid);
+extern pid_t pid_type_to_vpid(int type, pid_t pid);
+extern pid_t _pid_type_to_vpid(int type, pid_t pid);
+
+static inline int comb_vpid_to_pid(int vpid)
+{
+	int pid = vpid;
+
+	if (vpid > 0) {
+		pid = vpid_to_pid(vpid);
+		if (unlikely(pid < 0))
+			return 0;
+	} else if (vpid < 0) {
+		pid = vpid_to_pid(-vpid);
+		if (unlikely(pid < 0))
+			return 0;
+		pid = -pid;
+	}
+	return pid;
+}
+
+static inline int comb_pid_to_vpid(int pid)
+{
+	int vpid = pid;
+
+	if (pid > 0) {
+		vpid = pid_type_to_vpid(PIDTYPE_PID, pid);
+		if (unlikely(vpid < 0))
+			return 0;
+	} else if (pid < 0) {
+		vpid = pid_type_to_vpid(PIDTYPE_PGID, -pid);
+		if (unlikely(vpid < 0))
+			return 0;
+		vpid = -vpid;
+	}
+	return vpid;
+}
+
+extern int glob_virt_pids;
+#endif
+
+#define do_each_task_pid_all(who, type, task)				\
+	if ((task = find_task_by_pid_type_all(type, who))) {		\
 		prefetch((task)->pids[type].pid_list.next);		\
 		do {
 
-#define while_each_task_pid(who, type, task)				\
+#define while_each_task_pid_all(who, type, task)			\
 		} while (task = pid_task((task)->pids[type].pid_list.next,\
 						type),			\
 			prefetch((task)->pids[type].pid_list.next),	\
 			hlist_unhashed(&(task)->pids[type].pid_chain));	\
 	}								\
 
+#ifndef CONFIG_VE
+#define __do_each_task_pid_ve(who, type, task, owner)			\
+		do_each_task_pid_all(who, type, task)
+#define __while_each_task_pid_ve(who, type, task, owner)		\
+		while_each_task_pid_all(who, type, task)
+#else /* CONFIG_VE */
+#define __do_each_task_pid_ve(who, type, task, owner)			\
+		do_each_task_pid_all(who, type, task)			\
+			if (ve_accessible(VE_TASK_INFO(task)->owner_env, owner))
+#define __while_each_task_pid_ve(who, type, task, owner)		\
+		while_each_task_pid_all(who, type, task)
+#endif /* CONFIG_VE */
+
+#define do_each_task_pid_ve(who, type, task)				\
+		__do_each_task_pid_ve(who, type, task, get_exec_env());
+#define while_each_task_pid_ve(who, type, task)				\
+		__while_each_task_pid_ve(who, type, task, get_exec_env());
+
 #endif /* _LINUX_PID_H */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/proc_fs.h linux-2.6.16.46-0.12-027test011/include/linux/proc_fs.h
--- linux-2.6.16.46-0.12.orig/include/linux/proc_fs.h	2007-08-24 19:28:10.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/proc_fs.h	2007-08-28 17:35:31.000000000 +0400
@@ -4,7 +4,7 @@
 #include <linux/config.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
-#include <linux/spinlock.h>
+#include <linux/smp_lock.h>
 #include <asm/atomic.h>
 
 /*
@@ -87,22 +87,28 @@ struct vmcore {
 
 extern struct proc_dir_entry proc_root;
 extern struct proc_dir_entry *proc_root_fs;
+extern struct file_system_type proc_fs_type;
+
+#ifdef CONFIG_VE
+#include <linux/sched.h>
+#define proc_net	(get_exec_env()->_proc_net)
+#define proc_net_stat	(get_exec_env()->_proc_net_stat)
+#else
 extern struct proc_dir_entry *proc_net;
 extern struct proc_dir_entry *proc_net_stat;
+#endif
 extern struct proc_dir_entry *proc_bus;
 extern struct proc_dir_entry *proc_root_driver;
 extern struct proc_dir_entry *proc_root_kcore;
 
-extern spinlock_t proc_subdir_lock;
-
 extern void proc_root_init(void);
 extern void proc_misc_init(void);
 
 struct mm_struct;
 
 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *);
-struct dentry *proc_pid_unhash(struct task_struct *p);
-void proc_pid_flush(struct dentry *proc_dentry);
+void proc_pid_unhash(struct task_struct *p, struct dentry * [2]);
+void proc_pid_flush(struct dentry *proc_dentry[2]);
 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
 unsigned long task_vsize(struct mm_struct *);
 int task_statm(struct mm_struct *, int *, int *, int *, int *);
@@ -110,7 +116,11 @@ char *task_mem(struct mm_struct *, char 
 
 extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
 						struct proc_dir_entry *parent);
+extern struct proc_dir_entry *create_proc_glob_entry(const char *name,
+						mode_t mode,
+						struct proc_dir_entry *parent);
 extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent);
+extern void remove_proc_glob_entry(const char *name, struct proc_dir_entry *parent);
 
 extern struct vfsmount *proc_mnt;
 extern int proc_fill_super(struct super_block *,void *,int);
@@ -197,6 +207,15 @@ static inline struct proc_dir_entry *pro
 	return res;
 }
 
+static inline struct proc_dir_entry *proc_glob_fops_create(const char *name,
+	mode_t mode, struct file_operations *fops)
+{
+	struct proc_dir_entry *res = create_proc_glob_entry(name, mode, NULL);
+	if (res)
+		res->proc_fops = fops;
+	return res;
+}
+
 static inline void proc_net_remove(const char *name)
 {
 	remove_proc_entry(name,proc_net);
@@ -209,16 +228,21 @@ static inline void proc_net_remove(const
 #define proc_bus NULL
 
 #define proc_net_fops_create(name, mode, fops)  ({ (void)(mode), NULL; })
+#define proc_glob_fops_create(name, mode, fops)  ({ (void)(mode), NULL; })
 #define proc_net_create(name, mode, info)	({ (void)(mode), NULL; })
 static inline void proc_net_remove(const char *name) {}
 
-static inline struct dentry *proc_pid_unhash(struct task_struct *p) { return NULL; }
-static inline void proc_pid_flush(struct dentry *proc_dentry) { }
+static inline struct dentry *proc_pid_unhash(struct task_struct *p,
+		struct dentry *d[2]) { return NULL; }
+static inline void proc_pid_flush(struct dentry *proc_dentry[2]) { }
 
 static inline struct proc_dir_entry *create_proc_entry(const char *name,
 	mode_t mode, struct proc_dir_entry *parent) { return NULL; }
+static inline struct proc_dir_entry *create_proc_glob_entry(const char *name,
+	mode_t mode, struct proc_dir_entry *parent) { return NULL; }
 
 #define remove_proc_entry(name, parent) do {} while (0)
+#define remove_proc_glob_entry(name, parent) do {} while (0)
 
 static inline struct proc_dir_entry *proc_symlink(const char *name,
 		struct proc_dir_entry *parent,const char *dest) {return NULL;}
@@ -240,6 +264,48 @@ extern struct proc_dir_entry proc_root;
 
 #endif /* CONFIG_PROC_FS */
 
+static inline struct proc_dir_entry *create_proc_entry_mod(const char *name,
+					mode_t mode,
+					struct proc_dir_entry *parent,
+					struct module *owner)
+{
+	struct proc_dir_entry *ent;
+
+	/*
+	 * lock_kernel() here protects against proc_lookup()
+	 * which can find this freshly created entry w/o owner being set.
+	 * this can lead to module being put more times then getted.
+	 */
+	lock_kernel();
+	ent = create_proc_entry(name, mode, parent);
+	if (ent)
+		ent->owner = owner;
+	unlock_kernel();
+
+	return ent;
+}
+
+static inline struct proc_dir_entry *create_proc_glob_entry_mod(const char *name, 
+					mode_t mode,
+					struct proc_dir_entry *parent,
+					struct module *owner)
+{
+	struct proc_dir_entry *ent;
+
+	/*
+	 * lock_kernel() here protects against proc_lookup()
+	 * which can find this freshly created entry w/o owner being set.
+	 * this can lead to module being put more times then getted.
+	 */
+	lock_kernel();
+	ent = create_proc_glob_entry(name, mode, parent);
+	if (ent)
+		ent->owner = owner;
+	unlock_kernel();
+
+	return ent;
+}
+
 #if !defined(CONFIG_PROC_KCORE)
 static inline void kclist_add(struct kcore_list *new, void *addr, size_t size)
 {
@@ -269,4 +335,18 @@ static inline struct proc_dir_entry *PDE
 	return PROC_I(inode)->pde;
 }
 
+static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de)
+{
+	if (de)
+		atomic_inc(&de->count);
+	return de;
+}
+
+extern void de_put(struct proc_dir_entry *);
+
+#define LPDE(inode)	(PROC_I((inode))->pde)
+#ifdef CONFIG_VE
+#define GPDE(inode)	(*(struct proc_dir_entry **)(&(inode)->i_pipe))
+#endif
+
 #endif /* _LINUX_PROC_FS_H */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/quota.h linux-2.6.16.46-0.12-027test011/include/linux/quota.h
--- linux-2.6.16.46-0.12.orig/include/linux/quota.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/quota.h	2007-08-28 17:35:33.000000000 +0400
@@ -37,7 +37,6 @@
 
 #include <linux/errno.h>
 #include <linux/types.h>
-#include <linux/spinlock.h>
 
 #define __DQUOT_VERSION__	"dquot_6.5.1"
 #define __DQUOT_NUM_VERSION__	6*10000+5*100+1
@@ -45,8 +44,6 @@
 typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
 typedef __u64 qsize_t;          /* Type in which we store sizes */
 
-extern spinlock_t dq_data_lock;
-
 /* Size of blocks in which are counted size limits */
 #define QUOTABLOCK_BITS 10
 #define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
@@ -133,6 +130,10 @@ struct if_dqinfo {
 
 #ifdef __KERNEL__
 
+#include <linux/spinlock.h>
+
+extern spinlock_t dq_data_lock;
+
 #include <linux/dqblk_xfs.h>
 #include <linux/dqblk_v1.h>
 #include <linux/dqblk_v2.h>
@@ -242,6 +243,8 @@ struct quota_format_ops {
 	int (*release_dqblk)(struct dquot *dquot);	/* Called when last reference to dquot is being dropped */
 };
 
+struct inode;
+struct iattr;
 /* Operations working with dquots */
 struct dquot_operations {
 	int (*initialize) (struct inode *, int);
@@ -256,9 +259,11 @@ struct dquot_operations {
 	int (*release_dquot) (struct dquot *);		/* Quota is going to be deleted from disk */
 	int (*mark_dirty) (struct dquot *);		/* Dquot is marked dirty */
 	int (*write_info) (struct super_block *, int);	/* Write of quota "superblock" */
+	int (*rename) (struct inode *, struct inode *, struct inode *);
 };
 
 /* Operations handling requests from userspace */
+struct v2_disk_dqblk;
 struct quotactl_ops {
 	int (*quota_on)(struct super_block *, int, int, char *);
 	int (*quota_off)(struct super_block *, int);
@@ -271,6 +276,10 @@ struct quotactl_ops {
 	int (*set_xstate)(struct super_block *, unsigned int, int);
 	int (*get_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
 	int (*set_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
+#ifdef CONFIG_QUOTA_COMPAT
+	int (*get_quoti)(struct super_block *, int, unsigned int,
+			struct v2_disk_dqblk __user *);
+#endif
 };
 
 struct quota_format_type {
@@ -291,6 +300,10 @@ struct quota_info {
 	struct inode *files[MAXQUOTAS];		/* inodes of quotafiles */
 	struct mem_dqinfo info[MAXQUOTAS];	/* Information for each quota type */
 	struct quota_format_ops *ops[MAXQUOTAS];	/* Operations for each type */
+#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
+	struct vz_quota_master *vzdq_master;
+	int vzdq_count;
+#endif
 };
 
 /* Inline would be better but we need to dereference super_block which is not defined yet */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/quotaops.h linux-2.6.16.46-0.12-027test011/include/linux/quotaops.h
--- linux-2.6.16.46-0.12.orig/include/linux/quotaops.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/quotaops.h	2007-08-28 17:35:33.000000000 +0400
@@ -171,6 +171,19 @@ static __inline__ int DQUOT_TRANSFER(str
 	return 0;
 }
 
+static __inline__ int DQUOT_RENAME(struct inode *inode,
+		struct inode *old_dir, struct inode *new_dir)
+{
+	struct dquot_operations *q_op;
+
+	q_op = inode->i_sb->dq_op;
+	if (q_op && q_op->rename) {
+		if (q_op->rename(inode, old_dir, new_dir) == NO_QUOTA)
+			return 1;
+	}
+	return 0;
+}
+
 /* The following two functions cannot be called inside a transaction */
 #define DQUOT_SYNC(sb)	sync_dquots(sb, -1)
 
@@ -197,6 +210,7 @@ static __inline__ int DQUOT_OFF(struct s
 #define DQUOT_SYNC(sb)				do { } while(0)
 #define DQUOT_OFF(sb)				do { } while(0)
 #define DQUOT_TRANSFER(inode, iattr)		(0)
+#define DQUOT_RENAME(inode, old_dir, new_dir)	(0)
 static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
 {
 	inode_add_bytes(inode, nr);
diff -upr linux-2.6.16.46-0.12.orig/include/linux/rmap.h linux-2.6.16.46-0.12-027test011/include/linux/rmap.h
--- linux-2.6.16.46-0.12.orig/include/linux/rmap.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/rmap.h	2007-08-28 17:35:33.000000000 +0400
@@ -74,6 +74,7 @@ void page_add_anon_rmap(struct page *, s
 void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
 void page_add_file_rmap(struct page *);
 void page_remove_rmap(struct page *);
+struct anon_vma *page_lock_anon_vma(struct page *page);
 
 /**
  * page_dup_rmap - duplicate pte mapping to a page
diff -upr linux-2.6.16.46-0.12.orig/include/linux/rtnetlink.h linux-2.6.16.46-0.12-027test011/include/linux/rtnetlink.h
--- linux-2.6.16.46-0.12.orig/include/linux/rtnetlink.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/rtnetlink.h	2007-08-28 17:35:30.000000000 +0400
@@ -905,6 +905,7 @@ struct tcamsg
 #ifdef __KERNEL__
 
 #include <linux/config.h>
+#include <linux/mutex.h>
 
 extern size_t rtattr_strlcpy(char *dest, const struct rtattr *rta, size_t size);
 static __inline__ int rtattr_strcmp(const struct rtattr *rta, const char *str)
@@ -1036,24 +1037,17 @@ __rta_reserve(struct sk_buff *skb, int a
 
 extern void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change);
 
-extern struct semaphore rtnl_sem;
-
-#define rtnl_shlock()		down(&rtnl_sem)
-#define rtnl_shlock_nowait()	down_trylock(&rtnl_sem)
-
-#define rtnl_shunlock()	do { up(&rtnl_sem); \
-		             if (rtnl && rtnl->sk_receive_queue.qlen) \
-				     rtnl->sk_data_ready(rtnl, 0); \
-		        } while(0)
-
+/* RTNL is used as a global lock for all changes to network configuration  */
 extern void rtnl_lock(void);
-extern int rtnl_lock_interruptible(void);
 extern void rtnl_unlock(void);
+extern int rtnl_trylock(void);
+
 extern void rtnetlink_init(void);
+extern void __rtnl_unlock(void);
 
 #define ASSERT_RTNL() do { \
-	if (unlikely(down_trylock(&rtnl_sem) == 0)) { \
-		up(&rtnl_sem); \
+	if (unlikely(rtnl_trylock())) { \
+		rtnl_unlock(); \
 		printk(KERN_ERR "RTNL: assertion failed at %s (%d)\n", \
 		       __FILE__,  __LINE__); \
 		dump_stack(); \
diff -upr linux-2.6.16.46-0.12.orig/include/linux/sched.h linux-2.6.16.46-0.12-027test011/include/linux/sched.h
--- linux-2.6.16.46-0.12.orig/include/linux/sched.h	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/sched.h	2007-08-28 17:35:34.000000000 +0400
@@ -38,7 +38,10 @@
 
 #include <linux/auxvec.h>	/* For AT_VECTOR_SIZE */
 
+#include <ub/ub_task.h>
+
 struct exec_domain;
+struct ve_struct;
 
 /*
  * cloning flags:
@@ -92,17 +95,36 @@ extern unsigned long avenrun[];		/* Load
 	load += n*(FIXED_1-exp); \
 	load >>= FSHIFT;
 
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+
 extern unsigned long total_forks;
 extern int nr_threads;
 extern int last_pid;
 DECLARE_PER_CPU(unsigned long, process_counts);
 DECLARE_PER_CPU(struct runqueue, runqueues);
 extern int nr_processes(void);
+
+extern unsigned long nr_sleeping(void);
+extern unsigned long nr_stopped(void);
+extern unsigned long nr_zombie;
+extern atomic_t nr_dead;
 extern unsigned long nr_running(void);
 extern unsigned long nr_uninterruptible(void);
 extern unsigned long nr_active(void);
 extern unsigned long nr_iowait(void);
 
+#ifdef CONFIG_VE
+struct ve_struct;
+extern unsigned long nr_running_ve(struct ve_struct *);
+extern unsigned long nr_iowait_ve(void);
+extern unsigned long nr_uninterruptible_ve(struct ve_struct *);
+#else
+#define nr_running_ve(ve)		0
+#define nr_iowait_ve()			0
+#define nr_uninterruptible_ve(ve)	0
+#endif
+
 #include <linux/time.h>
 #include <linux/param.h>
 #include <linux/resource.h>
@@ -191,6 +213,8 @@ extern cpumask_t nohz_cpu_mask;
 
 extern void show_state(void);
 extern void show_regs(struct pt_regs *);
+extern void smp_show_regs(struct pt_regs *, void *);
+extern void show_vsched(void);
 
 /*
  * TASK is a pointer to the task whose backtrace we want to see (or NULL for current
@@ -334,6 +358,8 @@ struct mm_struct {
 	unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
 
 	unsigned dumpable:2;
+	unsigned vps_dumpable:2;
+	unsigned oom_killed:1;
 	cpumask_t cpu_vm_mask;
 
 	/* Architecture-specific MM context */
@@ -350,6 +376,9 @@ struct mm_struct {
 	/* aio bits */
 	rwlock_t		ioctx_list_lock;
 	struct kioctx		*ioctx_list;
+#ifdef CONFIG_USER_RESOURCE
+	struct user_beancounter *mm_ub;
+#endif
 };
 
 struct sighand_struct {
@@ -366,6 +395,9 @@ static inline void sighand_free(struct s
 	call_rcu(&sp->rcu, sighand_free_cb);
 }
 
+#include <linux/ve.h>
+#include <linux/ve_task.h>
+
 /*
  * NOTE! "signal_struct" does not have it's own
  * locking, because a shared signal_struct always
@@ -744,6 +776,8 @@ static inline void prefetch_stack(struct
 
 struct audit_context;		/* See audit.c */
 struct mempolicy;
+struct vcpu_scheduler;
+struct vcpu_struct;
 
 struct task_struct {
 	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
@@ -757,6 +791,14 @@ struct task_struct {
 #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
 	int oncpu;
 #endif
+#ifdef CONFIG_SCHED_VCPU
+	struct vcpu_scheduler *vsched;
+	struct vcpu_struct *vcpu;
+
+	/* id's are saved to avoid locking (e.g. on vsched->id access) */
+	int vsched_id;
+	int vcpu_id;
+#endif
 	int prio, static_prio;
 	struct list_head run_list;
 	prio_array_t *array;
@@ -903,6 +945,11 @@ struct task_struct {
 
 	unsigned long ptrace_message;
 	siginfo_t *last_siginfo; /* For ptrace use.  */
+
+/* state tracking for suspend */
+	__u8	 pn_state;
+	__u8	 stopped_state:1;
+
 /*
  * current io wait handle: wait queue entry to use for io waits
  * If this thread is processing aio, this points at the waitqueue
@@ -941,6 +988,16 @@ struct task_struct {
 
 /* TASK_UNMAPPED_BASE */
 	unsigned long map_base;
+#ifdef CONFIG_USER_RESOURCE
+	struct task_beancounter task_bc;
+#endif
+#ifdef CONFIG_VE
+	struct ve_task_info ve_task_info;
+#endif
+#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
+	unsigned long	magic;
+	struct inode	*ino;
+#endif
 };
 
 static inline pid_t process_group(struct task_struct *tsk)
@@ -972,6 +1029,43 @@ static inline void put_task_struct(struc
 		call_rcu(&t->rcu, __put_task_struct_cb);
 }
 
+#ifndef CONFIG_VE
+#define set_pn_state(tsk, state)	do { } while(0)
+#define clear_pn_state(tsk)		do { } while(0)
+#define set_stop_state(tsk)		do { } while(0)
+#define clear_stop_state(tsk)		do { } while(0)
+#else
+#define PN_STOP_TF	1	/* was not in 2.6.8 */
+#define PN_STOP_TF_RT	2	/* was not in 2.6.8 */
+#define PN_STOP_ENTRY	3
+#define PN_STOP_FORK	4
+#define PN_STOP_VFORK	5
+#define PN_STOP_SIGNAL	6
+#define PN_STOP_EXIT	7
+#define PN_STOP_EXEC	8
+#define PN_STOP_LEAVE	9
+
+static inline void set_pn_state(struct task_struct *tsk, int state)
+{
+	tsk->pn_state = state;
+}
+
+static inline void clear_pn_state(struct task_struct *tsk)
+{
+	tsk->pn_state = 0;
+}
+
+static inline void set_stop_state(struct task_struct *tsk)
+{
+	tsk->stopped_state = 1;
+}
+
+static inline void clear_stop_state(struct task_struct *tsk)
+{
+	tsk->stopped_state = 0;
+}
+#endif
+
 /*
  * Per process flags
  */
@@ -987,7 +1081,7 @@ static inline void put_task_struct(struc
 #define PF_MEMALLOC	0x00000800	/* Allocating memory */
 #define PF_FLUSHER	0x00001000	/* responsible for disk writeback */
 #define PF_USED_MATH	0x00002000	/* if unset the fpu must be initialized before use */
-#define PF_FREEZE	0x00004000	/* this task is being frozen for suspend now */
+#define PF_EXIT_RESTART	0x00004000	/* do_exit() restarted, see do_exit() */
 #define PF_NOFREEZE	0x00008000	/* this thread should not be frozen */
 #define PF_FROZEN	0x00010000	/* frozen for system suspend */
 #define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
@@ -1041,6 +1135,21 @@ static inline int set_cpus_allowed(task_
 extern unsigned long long sched_clock(void);
 extern unsigned long long current_sched_time(const task_t *current_task);
 
+static inline unsigned long cycles_to_clocks(cycles_t cycles)
+{
+	extern unsigned long cycles_per_clock;
+	do_div(cycles, cycles_per_clock);
+	return cycles;
+}
+
+static inline u64 cycles_to_jiffies(cycles_t cycles)
+{
+	extern unsigned long cycles_per_jiffy;
+	do_div(cycles, cycles_per_jiffy);
+	return cycles;
+}
+
+
 /* sched_exec is called by processes performing an exec */
 #ifdef CONFIG_SMP
 extern void sched_exec(void);
@@ -1089,6 +1198,7 @@ struct prio_array {
  * (such as the load balancing or the thread migration code), lock
  * acquire operations must be ordered by ascending &runqueue.
  */
+typedef struct vcpu_struct *vcpu_t;
 struct runqueue {
 	spinlock_t lock;
 
@@ -1110,9 +1220,12 @@ struct runqueue {
 	 */
 	unsigned long nr_uninterruptible;
 
+	unsigned long nr_sleeping;
+	unsigned long nr_stopped;
+
 	unsigned long expired_timestamp;
 	unsigned long long timestamp_last_tick;
-	task_t *curr, *idle;
+	task_t *curr;
 	struct mm_struct *prev_mm;
 	prio_array_t *active, *expired, arrays[2];
 	int best_expired_prio;
@@ -1123,12 +1236,13 @@ struct runqueue {
 
 	/* For active balancing */
 	int active_balance;
-	int push_cpu;
+#endif
+	vcpu_t push_cpu;
 
 	task_t *migration_thread;
+	int migration_thread_init;
 	struct list_head migration_queue;
 	int cpu;
-#endif
 
 #ifdef CONFIG_SCHEDSTATS
 	/* latency stats */
@@ -1176,12 +1290,222 @@ extern struct task_struct init_task;
 
 extern struct   mm_struct init_mm;
 
-#define find_task_by_pid(nr)	find_task_by_pid_type(PIDTYPE_PID, nr)
-extern struct task_struct *find_task_by_pid_type(int type, int pid);
+#define find_task_by_pid_all(nr)	\
+		find_task_by_pid_type_all(PIDTYPE_PID, nr)
+extern struct task_struct *find_task_by_pid_type_all(int type, int pid);
 extern void set_special_pids(pid_t session, pid_t pgrp);
 extern void __set_special_pids(pid_t session, pid_t pgrp);
 
+#ifndef CONFIG_VE
+#define find_task_by_pid_ve find_task_by_pid_all
+
+#define ve_is_super(env)			1
+#define ve_accessible(target, owner)		1
+#define ve_accessible_strict(target, owner)	1
+#define ve_accessible_veid(target, owner)		1
+#define ve_accessible_strict_veid(target, owner)	1
+
+#define VEID(envid)				0
+
+static inline pid_t virt_pid(struct task_struct *tsk)
+{
+	return tsk->pid;
+}
+
+static inline pid_t virt_tgid(struct task_struct *tsk)
+{
+	return tsk->tgid;
+}
+
+static inline pid_t virt_pgid(struct task_struct *tsk)
+{
+	return tsk->signal->pgrp;
+}
+
+static inline pid_t virt_sid(struct task_struct *tsk)
+{
+	return tsk->signal->session;
+}
+
+#define get_task_pid_ve(tsk, ve)	get_task_pid(tsk)
+
+static inline pid_t get_task_pid(struct task_struct *tsk)
+{
+	return tsk->pid;
+}
+
+static inline pid_t get_task_tgid(struct task_struct *tsk)
+{
+	return tsk->tgid;
+}
+
+static inline pid_t get_task_pgid(struct task_struct *tsk)
+{
+	return tsk->signal->pgrp;
+}
+
+static inline pid_t get_task_sid(struct task_struct *tsk)
+{
+	return tsk->signal->session;
+}
+
+static inline void set_virt_pid(struct task_struct *tsk, pid_t pid)
+{
+}
+
+static inline void set_virt_tgid(struct task_struct *tsk, pid_t pid)
+{
+}
+
+static inline void set_virt_pgid(struct task_struct *tsk, pid_t pid)
+{
+}
+
+static inline void set_virt_sid(struct task_struct *tsk, pid_t pid)
+{
+}
+
+static inline pid_t get_task_ppid(struct task_struct *p)
+{
+	return pid_alive(p) ? p->group_leader->real_parent->tgid : 0;
+}
+
+#else	/* CONFIG_VE */
+
+#include <asm/current.h>
+#include <linux/ve.h>
+
+#define find_task_by_pid_ve(nr)	\
+		find_task_by_pid_type_ve(PIDTYPE_PID, nr)
+
+extern struct task_struct *find_task_by_pid_type_ve(int type, int pid);
+
+#define VEID(envid)	((envid)->veid)
+
+#define ve_is_super(env) ((env) == get_ve0())
+#define ve_accessible_strict(target, owner)	((target) == (owner))
+static inline int ve_accessible(struct ve_struct *target,
+				struct ve_struct *owner) {
+	return ve_is_super(owner) || ve_accessible_strict(target, owner);
+}
+
+#define ve_accessible_strict_veid(target, owner) ((target) == (owner))
+static inline int ve_accessible_veid(envid_t target, envid_t owner)
+{
+	return get_ve0()->veid == owner ||
+	       ve_accessible_strict_veid(target, owner);
+}
+
+static inline pid_t virt_pid(struct task_struct *tsk)
+{
+	return tsk->pids[PIDTYPE_PID].vnr;
+}
+
+static inline pid_t virt_tgid(struct task_struct *tsk)
+{
+	return tsk->pids[PIDTYPE_TGID].vnr;
+}
+
+static inline pid_t virt_pgid(struct task_struct *tsk)
+{
+	return tsk->pids[PIDTYPE_PGID].vnr;
+}
+
+static inline pid_t virt_sid(struct task_struct *tsk)
+{
+	return tsk->pids[PIDTYPE_SID].vnr;
+}
+
+static inline pid_t get_task_pid_ve(struct task_struct *tsk, struct ve_struct *env)
+{
+	return ve_is_super(env) ? tsk->pid : virt_pid(tsk);
+}
+
+static inline pid_t get_task_pid(struct task_struct *tsk)
+{
+	return get_task_pid_ve(tsk, get_exec_env());
+}
+
+static inline pid_t get_task_tgid(struct task_struct *tsk)
+{
+	return ve_is_super(get_exec_env()) ? tsk->tgid : virt_tgid(tsk);
+}
+
+static inline pid_t get_task_pgid(struct task_struct *tsk)
+{
+	return ve_is_super(get_exec_env()) ? tsk->signal->pgrp : virt_pgid(tsk);
+}
+
+static inline pid_t get_task_sid(struct task_struct *tsk)
+{
+	return ve_is_super(get_exec_env()) ? tsk->signal->session : virt_sid(tsk);
+}
+
+static inline void set_virt_pid(struct task_struct *tsk, pid_t pid)
+{
+	tsk->pids[PIDTYPE_PID].vnr = pid;
+}
+
+static inline void set_virt_tgid(struct task_struct *tsk, pid_t pid)
+{
+	tsk->pids[PIDTYPE_TGID].vnr = pid;
+}
+
+static inline void set_virt_pgid(struct task_struct *tsk, pid_t pid)
+{
+	tsk->pids[PIDTYPE_PGID].vnr = pid;
+}
+
+static inline void set_virt_sid(struct task_struct *tsk, pid_t pid)
+{
+	tsk->pids[PIDTYPE_SID].vnr = pid;
+}
+
+static inline pid_t get_task_ppid(struct task_struct *p)
+{
+	struct task_struct *parent;
+	struct ve_struct *env;
+
+	if (!pid_alive(p))
+		return 0;
+	env = get_exec_env();
+	if (get_task_pid_ve(p, env) == 1)
+		return 0;
+	parent = p->group_leader->real_parent;
+	return ve_accessible(VE_TASK_INFO(parent)->owner_env, env) ?
+		get_task_tgid(parent) : 1;
+}
+
+void ve_sched_get_cpu_stat(struct ve_struct *envid, cycles_t *idle,
+				cycles_t *strv, unsigned int cpu);
+void ve_sched_attach(struct ve_struct *envid);
+
+#endif	/* CONFIG_VE */
+
+
+#ifdef CONFIG_VE
+extern cycles_t __ve_sched_get_idle_time(struct ve_struct *ve, int cpu);
+extern cycles_t ve_sched_get_iowait_time(int cpu);
+#else
+#define __ve_sched_get_idle_time(ve, cpu)	 0
+#define ve_sched_get_iowait_time(cpu)		0
+#endif
+
+#define ve_sched_get_idle_time(cpu)	\
+	__ve_sched_get_idle_time(get_exec_env(), cpu)
+
+#ifdef CONFIG_SCHED_VCPU
+struct vcpu_scheduler;
+extern void fastcall vsched_cpu_online_map(struct vcpu_scheduler *sched,
+		cpumask_t *mask);
+#else
+#define vsched_cpu_online_map(vsched, mask)     do {    \
+			*mask = cpu_online_map;         \
+	} while (0)
+#endif
+
 /* per-UID process charging. */
+extern int set_user(uid_t new_ruid, int dumpclear);
 extern struct user_struct * alloc_uid(uid_t);
 static inline struct user_struct *get_uid(struct user_struct *u)
 {
@@ -1199,7 +1523,7 @@ extern int FASTCALL(wake_up_state(struct
 extern int FASTCALL(wake_up_process(struct task_struct * tsk));
 extern void FASTCALL(wake_up_new_task(struct task_struct * tsk,
 						unsigned long clone_flags));
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined (CONFIG_SCHED_VCPU)
  extern void kick_process(struct task_struct *tsk);
 #else
  static inline void kick_process(struct task_struct *tsk) { }
@@ -1317,12 +1641,19 @@ extern task_t *child_reaper;
 
 extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *);
 extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
+extern long do_fork_pid(unsigned long clone_flags,
+			unsigned long stack_start,
+			struct pt_regs *regs,
+			unsigned long stack_size,
+			int __user *parent_tidptr,
+			int __user *child_tidptr,
+			long pid0);
 task_t *fork_idle(int);
 
 extern void set_task_comm(struct task_struct *tsk, char *from);
 extern void get_task_comm(char *to, struct task_struct *tsk);
 
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined (CONFIG_SCHED_VCPU)
 extern void wait_task_inactive(task_t * p);
 #else
 #define wait_task_inactive(p)	do { } while (0)
@@ -1343,22 +1674,100 @@ extern void wait_task_inactive(task_t * 
 	add_parent(p, (p)->parent);				\
 	} while (0)
 
-#define next_task(p)	list_entry((p)->tasks.next, struct task_struct, tasks)
-#define prev_task(p)	list_entry((p)->tasks.prev, struct task_struct, tasks)
+#define next_task_all(p)	list_entry((p)->tasks.next, struct task_struct, tasks)
+#define prev_task_all(p)	list_entry((p)->tasks.prev, struct task_struct, tasks)
 
-#define for_each_process(p) \
-	for (p = &init_task ; (p = next_task(p)) != &init_task ; )
+#define for_each_process_all(p) \
+	for (p = &init_task ; (p = next_task_all(p)) != &init_task ; )
 
 /*
  * Careful: do_each_thread/while_each_thread is a double loop so
  *          'break' will not work as expected - use goto instead.
  */
-#define do_each_thread(g, t) \
-	for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do
+#define do_each_thread_all(g, t) \
+	for (g = t = &init_task ; (g = t = next_task_all(g)) != &init_task ; ) do
 
-#define while_each_thread(g, t) \
+#define while_each_thread_all(g, t) \
 	while ((t = next_thread(t)) != g)
 
+#ifndef CONFIG_VE
+
+#define SET_VE_LINKS(p)
+#define REMOVE_VE_LINKS(p)
+#define for_each_process_ve(p)		for_each_process_all(p)
+#define do_each_thread_ve(g, t)		do_each_thread_all(g, t)
+#define while_each_thread_ve(g, t)	while_each_thread_all(g, t)
+#define first_task_ve()			next_task_ve(&init_task)
+#define __first_task_ve(owner)		next_task_ve(&init_task)
+#define __next_task_ve(owner, p)	next_task_ve(p)
+#define next_task_ve(p)			\
+		(next_task_all(p) != &init_task ? next_task_all(p) : NULL)
+
+#else	/* CONFIG_VE */
+
+#define SET_VE_LINKS(p)							\
+	do {								\
+		if (thread_group_leader(p))				\
+			list_add_tail(&VE_TASK_INFO(p)->vetask_list,	\
+					&VE_TASK_INFO(p)->owner_env->vetask_lh); \
+	} while (0)
+
+#define REMOVE_VE_LINKS(p)						\
+	do {								\
+		if (thread_group_leader(p))				\
+			list_del(&VE_TASK_INFO(p)->vetask_list);	\
+	} while(0)
+
+static inline task_t* __first_task_ve(struct ve_struct *ve)
+{
+	task_t *tsk;
+
+	if (unlikely(ve_is_super(ve))) {
+		tsk = next_task_all(&init_task);
+		if (tsk == &init_task)
+			tsk = NULL;
+	} else {
+		/* probably can return ve->init_entry, but it's more clear */
+		BUG_ON(list_empty(&ve->vetask_lh));
+		tsk = VE_TASK_LIST_2_TASK(ve->vetask_lh.next);
+	}
+	return tsk;
+}
+
+static inline task_t* __next_task_ve(struct ve_struct *ve, task_t *tsk)
+{
+	if (unlikely(ve_is_super(ve))) {
+		tsk = next_task_all(tsk);
+		if (tsk == &init_task)
+			tsk = NULL;
+	} else {
+		struct list_head *tmp;
+
+		BUG_ON(VE_TASK_INFO(tsk)->owner_env != ve);
+		tmp = VE_TASK_INFO(tsk)->vetask_list.next;
+		if (tmp == &ve->vetask_lh)
+			tsk = NULL;
+		else
+			tsk = VE_TASK_LIST_2_TASK(tmp);
+	}
+	return tsk;
+}
+
+#define first_task_ve()	__first_task_ve(get_exec_env())
+#define next_task_ve(p)	__next_task_ve(get_exec_env(), p)
+/* no one uses prev_task_ve(), copy next_task_ve() if needed */
+
+#define for_each_process_ve(p) \
+	for (p = first_task_ve(); p != NULL ; p = next_task_ve(p))
+
+#define do_each_thread_ve(g, t) \
+	for (g = t = first_task_ve() ; g != NULL; g = t = next_task_ve(g)) do
+
+#define while_each_thread_ve(g, t) \
+	while ((t = next_thread(t)) != g)
+
+#endif	/* CONFIG_VE */
+
 extern task_t * FASTCALL(next_thread(const task_t *p));
 
 #define thread_group_leader(p)	(p->pid == p->tgid)
@@ -1504,28 +1913,63 @@ extern void signal_wake_up(struct task_s
  */
 #ifdef CONFIG_SMP
 
-static inline unsigned int task_cpu(const struct task_struct *p)
+static inline unsigned int task_pcpu(const struct task_struct *p)
 {
 	return task_thread_info(p)->cpu;
 }
 
-static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
+static inline void set_task_pcpu(struct task_struct *p, unsigned int cpu)
 {
 	task_thread_info(p)->cpu = cpu;
 }
 
 #else
 
+static inline unsigned int task_pcpu(const struct task_struct *p)
+{
+	return 0;
+}
+
+static inline void set_task_pcpu(struct task_struct *p, unsigned int cpu)
+{
+}
+
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_SCHED_VCPU
+
+static inline unsigned int task_vsched_id(const struct task_struct *p)
+{
+	return p->vsched_id;
+}
+
 static inline unsigned int task_cpu(const struct task_struct *p)
 {
+	return p->vcpu_id;
+}
+
+extern void set_task_cpu(struct task_struct *p, unsigned int vcpu);
+extern int vcpu_online(int cpu);
+
+#else
+
+static inline unsigned int task_vsched_id(const struct task_struct *p)
+{
 	return 0;
 }
 
+static inline unsigned int task_cpu(const struct task_struct *p)
+{
+	return task_pcpu(p);
+}
+
 static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 {
+	set_task_pcpu(p, cpu);
 }
 
-#endif /* CONFIG_SMP */
+#define vcpu_online(cpu)	cpu_online(cpu)
+#endif /* CONFIG_SCHED_VCPU */
 
 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
 extern void arch_pick_mmap_layout(struct mm_struct *mm);
@@ -1553,20 +1997,12 @@ static inline int frozen(struct task_str
 }
 
 /*
- * Check if there is a request to freeze a process
- */
-static inline int freezing(struct task_struct *p)
-{
-	return p->flags & PF_FREEZE;
-}
-
-/*
  * Request that a process be frozen
  * FIXME: SMP problem. We may not modify other process' flags!
  */
 static inline void freeze(struct task_struct *p)
 {
-	p->flags |= PF_FREEZE;
+	set_tsk_thread_flag(p, TIF_FREEZE);
 }
 
 /*
@@ -1587,35 +2023,44 @@ static inline int thaw_process(struct ta
  */
 static inline void frozen_process(struct task_struct *p)
 {
-	p->flags = (p->flags & ~PF_FREEZE) | PF_FROZEN;
+	clear_tsk_thread_flag(p, TIF_FREEZE);
+	p->flags |= PF_FROZEN;
 }
 
-extern void refrigerator(void);
 extern int freeze_processes(void);
 extern void thaw_processes(void);
 
-static inline int try_to_freeze(void)
-{
-	if (freezing(current)) {
-		refrigerator();
-		return 1;
-	} else
-		return 0;
-}
 #else
 static inline int frozen(struct task_struct *p) { return 0; }
-static inline int freezing(struct task_struct *p) { return 0; }
 static inline void freeze(struct task_struct *p) { BUG(); }
 static inline int thaw_process(struct task_struct *p) { return 1; }
 static inline void frozen_process(struct task_struct *p) { BUG(); }
 
-static inline void refrigerator(void) {}
 static inline int freeze_processes(void) { BUG(); return 0; }
 static inline void thaw_processes(void) {}
 
-static inline int try_to_freeze(void) { return 0; }
-
 #endif /* CONFIG_PM */
+
+extern void refrigerator(void);
+
+/*
+ * Check if there is a request to freeze a process
+ */
+static inline int freezing(struct task_struct *p)
+{
+	return test_tsk_thread_flag(p, TIF_FREEZE);
+}
+
+static inline int try_to_freeze(void)
+{
+	if (freezing(current)) {
+		refrigerator();
+		return 1;
+	} else
+		return 0;
+}
+
+
 #endif /* __KERNEL__ */
 
 #endif
diff -upr linux-2.6.16.46-0.12.orig/include/linux/sem.h linux-2.6.16.46-0.12-027test011/include/linux/sem.h
--- linux-2.6.16.46-0.12.orig/include/linux/sem.h	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/sem.h	2007-08-28 17:35:33.000000000 +0400
@@ -155,6 +155,9 @@ static inline void exit_sem(struct task_
 }
 #endif
 
+int sysvipc_walk_sem(int (*func)(int, struct sem_array*, void *), void *arg);
+int sysvipc_setup_sem(key_t key, int semid, size_t size, int semflg);
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_SEM_H */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/shm.h linux-2.6.16.46-0.12-027test011/include/linux/shm.h
--- linux-2.6.16.46-0.12.orig/include/linux/shm.h	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/shm.h	2007-08-28 17:35:33.000000000 +0400
@@ -86,6 +86,7 @@ struct shmid_kernel /* private to the ke
 	pid_t			shm_cprid;
 	pid_t			shm_lprid;
 	struct user_struct	*mlock_user;
+	struct ipc_ids		*_shm_ids;
 };
 
 /* shm_mode upper byte flags */
@@ -104,6 +105,9 @@ static inline long do_shmat(int shmid, c
 }
 #endif
 
+int sysvipc_walk_shm(int (*func)(struct shmid_kernel*, void *), void *arg);
+struct file * sysvipc_setup_shm(key_t key, int shmid, size_t size, int shmflg);
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_SHM_H_ */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/shmem_fs.h linux-2.6.16.46-0.12-027test011/include/linux/shmem_fs.h
--- linux-2.6.16.46-0.12.orig/include/linux/shmem_fs.h	2007-08-24 19:28:24.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/shmem_fs.h	2007-08-28 17:35:31.000000000 +0400
@@ -23,6 +23,9 @@ struct shmem_inode_info {
 	struct posix_acl	*i_acl;
 	struct posix_acl	*i_default_acl;
 #endif
+#ifdef CONFIG_USER_RESOURCE
+	struct user_beancounter	*shmi_ub;
+#endif
 };
 
 struct shmem_sb_info {
@@ -57,4 +60,6 @@ static inline void shmem_acl_destroy_ino
 }
 #endif  /* CONFIG_TMPFS_POSIX_ACL */
 
+extern struct file_system_type tmpfs_fs_type;
+
 #endif
diff -upr linux-2.6.16.46-0.12.orig/include/linux/signal.h linux-2.6.16.46-0.12-027test011/include/linux/signal.h
--- linux-2.6.16.46-0.12.orig/include/linux/signal.h	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/signal.h	2007-08-28 17:35:33.000000000 +0400
@@ -3,6 +3,7 @@
 
 #include <linux/list.h>
 #include <linux/spinlock.h>
+#include <linux/slab.h>
 #include <asm/signal.h>
 #include <asm/siginfo.h>
 
@@ -44,6 +45,9 @@ struct sigqueue {
 	int flags;
 	siginfo_t info;
 	struct user_struct *user;
+#ifdef CONFIG_USER_RESOURCE
+	struct user_beancounter *sig_ub;
+#endif
 };
 
 /* flags values. */
@@ -266,6 +270,8 @@ extern int sigprocmask(int, sigset_t *, 
 struct pt_regs;
 extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie);
 
+extern kmem_cache_t *sigqueue_cachep;
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_SIGNAL_H */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/skbuff.h linux-2.6.16.46-0.12-027test011/include/linux/skbuff.h
--- linux-2.6.16.46-0.12.orig/include/linux/skbuff.h	2007-08-24 19:28:37.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/skbuff.h	2007-08-28 17:35:32.000000000 +0400
@@ -223,6 +223,8 @@ enum {
  *	@tc_verd: traffic control verdict
  */
 
+#include <ub/ub_sk.h>
+
 struct sk_buff {
 	/* These two members must be first. */
 	struct sk_buff		*next;
@@ -278,7 +280,9 @@ struct sk_buff {
 	__u8			pkt_type:3,
 				fclone:2,
 #ifndef CONFIG_XEN
-				ipvs_property:1;
+				ipvs_property:1,
+				accounted:1,
+				redirected:1;
 #else
 				ipvs_property:1,
 				proto_data_valid:1,
@@ -297,6 +301,9 @@ struct sk_buff {
 	struct nf_bridge_info	*nf_bridge;
 #endif
 #endif /* CONFIG_NETFILTER */
+#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
+	__u16			brmark;
+#endif
 #ifdef CONFIG_NET_SCHED
 	__u16			tc_index;	/* traffic control index */
 #ifdef CONFIG_NET_CLS_ACT
@@ -315,6 +322,8 @@ struct sk_buff {
 				*data,
 				*tail,
 				*end;
+	struct skb_beancounter	skb_bc;
+	struct ve_struct	*owner_env;
 };
 
 #ifdef __KERNEL__
@@ -322,6 +331,7 @@ struct sk_buff {
  *	Handling routines are only of interest to the kernel
  */
 #include <linux/slab.h>
+#include <ub/ub_net.h>
 
 #include <asm/system.h>
 
@@ -1029,6 +1039,8 @@ static inline int pskb_trim(struct sk_bu
  */
 static inline void skb_orphan(struct sk_buff *skb)
 {
+	ub_skb_uncharge(skb);
+
 	if (skb->destructor)
 		skb->destructor(skb);
 	skb->destructor = NULL;
@@ -1420,6 +1432,24 @@ static inline void nf_reset(struct sk_bu
 static inline void nf_reset(struct sk_buff *skb) {}
 #endif /* CONFIG_NETFILTER */
 
+#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
+static inline void skb_copy_brmark(struct sk_buff *to, const struct sk_buff *from)
+{
+	to->brmark = from->brmark;
+}
+
+static inline void skb_init_brmark(struct sk_buff *skb)
+{
+	skb->brmark = 0;
+}
+#else
+static inline void skb_copy_brmark(struct sk_buff *to, const struct sk_buff *from)
+{ }
+
+static inline void skb_init_brmark(struct sk_buff *skb)
+{ }
+#endif
+
 static inline int skb_is_gso(const struct sk_buff *skb)
 {
 	return skb_shinfo(skb)->gso_size;
diff -upr linux-2.6.16.46-0.12.orig/include/linux/slab.h linux-2.6.16.46-0.12-027test011/include/linux/slab.h
--- linux-2.6.16.46-0.12.orig/include/linux/slab.h	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/slab.h	2007-08-28 17:35:31.000000000 +0400
@@ -49,6 +49,26 @@ typedef struct kmem_cache kmem_cache_t;
 #define SLAB_DESTROY_BY_RCU	0x00080000UL	/* defer freeing pages to RCU */
 #define SLAB_MEM_SPREAD		0x00100000UL	/* Spread some memory over cpuset */
 
+/*
+ * allocation rules:                            __GFP_UBC       0
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *  cache (SLAB_UBC)				charge		charge
+ *				      (usual caches: mm, vma, task_struct, ...)
+ *
+ *  cache (SLAB_UBC | SLAB_NO_CHARGE)		charge		---
+ *					     (ub_kmalloc)    (kmalloc)
+ *
+ *  cache (no UB flags)				BUG()		---
+ *							(nonub caches, mempools)
+ *
+ *  pages					charge		---
+ *					   (ub_vmalloc,	      (vmalloc,
+ *				        poll, fdsets, ...)  non-ub allocs)
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#define SLAB_UBC		0x20000000UL	/* alloc space for ubs ... */
+#define SLAB_NO_CHARGE		0x40000000UL	/* ... but don't charge */
+
 /* flags passed to a constructor func */
 #define	SLAB_CTOR_CONSTRUCTOR	0x001UL		/* if not set, then deconstructor */
 #define SLAB_CTOR_ATOMIC	0x002UL		/* tell constructor it can't sleep */
@@ -77,6 +97,7 @@ struct cache_sizes {
 	kmem_cache_t	*cs_dmacachep;
 };
 extern struct cache_sizes malloc_sizes[];
+extern int malloc_cache_num;
 
 #ifndef CONFIG_DEBUG_SLAB
 extern void *__kmalloc(size_t, gfp_t);
@@ -88,7 +109,7 @@ extern void *__kmalloc_track_caller(size
 
 static inline void *kmalloc(size_t size, gfp_t flags)
 {
-	if (__builtin_constant_p(size)) {
+	if (__builtin_constant_p(size) && __builtin_constant_p(flags)) {
 		int i = 0;
 #define CACHE(x) \
 		if (size <= x) \
@@ -102,6 +123,8 @@ static inline void *kmalloc(size_t size,
 			__you_cannot_kmalloc_that_much();
 		}
 found:
+		if (flags & __GFP_UBC)
+			i += malloc_cache_num;
 		return kmem_cache_alloc((flags & GFP_DMA) ?
 			malloc_sizes[i].cs_dmacachep :
 			malloc_sizes[i].cs_cachep, flags);
@@ -109,6 +132,8 @@ found:
 	return __kmalloc(size, flags);
 }
 
+#define ub_kmalloc(size, flags) kmalloc(size, ((flags) | __GFP_UBC))
+
 extern void *kzalloc(size_t, gfp_t);
 
 /**
@@ -190,6 +215,7 @@ extern kmem_cache_t	*bio_cachep;
 
 extern atomic_t slab_reclaim_pages;
 
+void show_slab_info(void);
 #endif	/* __KERNEL__ */
 
 #endif	/* _LINUX_SLAB_H */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/smp.h linux-2.6.16.46-0.12-027test011/include/linux/smp.h
--- linux-2.6.16.46-0.12.orig/include/linux/smp.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/smp.h	2007-08-28 17:35:29.000000000 +0400
@@ -10,6 +10,9 @@
 
 extern void cpu_idle(void);
 
+struct pt_regs;
+typedef void (*smp_nmi_function)(struct pt_regs *regs, void *info);
+
 #ifdef CONFIG_SMP
 
 #include <linux/preempt.h>
@@ -49,6 +52,8 @@ extern int __cpu_up(unsigned int cpunum)
  */
 extern void smp_cpus_done(unsigned int max_cpus);
 
+extern int smp_nmi_call_function(smp_nmi_function func, void *info, int wait);
+
 /*
  * Call a function on all other processors
  */
@@ -99,6 +104,12 @@ static inline void smp_send_reschedule(i
 #define num_booting_cpus()			1
 #define smp_prepare_boot_cpu()			do {} while (0)
 
+static inline int smp_nmi_call_function(smp_nmi_function func,
+					 void *info, int wait)
+{
+	return 0;
+}
+
 #endif /* !SMP */
 
 /*
diff -upr linux-2.6.16.46-0.12.orig/include/linux/socket.h linux-2.6.16.46-0.12-027test011/include/linux/socket.h
--- linux-2.6.16.46-0.12.orig/include/linux/socket.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/socket.h	2007-08-28 17:35:32.000000000 +0400
@@ -287,6 +287,16 @@ struct ucred {
 #define IPX_TYPE	1
 
 #ifdef __KERNEL__
+
+#define MAX_SOCK_ADDR	128		/* 108 for Unix domain -
+					   16 for IP, 16 for IPX,
+					   24 for IPv6,
+					   about 80 for AX.25
+					   must be at least one bigger than
+					   the AF_UNIX size (see net/unix/af_unix.c
+					   :unix_mkname()).
+					 */
+
 extern int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
 extern int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, 
 				int offset, int len);
@@ -300,6 +310,8 @@ extern int memcpy_toiovec(struct iovec *
 extern int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, int __user *ulen);
 extern int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr);
 extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
+extern int vz_security_family_check(int family);
+extern int vz_security_protocol_check(int protocol);
 
 #endif
 #endif /* not kernel and not glibc */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/sunrpc/clnt.h linux-2.6.16.46-0.12-027test011/include/linux/sunrpc/clnt.h
--- linux-2.6.16.46-0.12.orig/include/linux/sunrpc/clnt.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/sunrpc/clnt.h	2007-08-28 17:35:33.000000000 +0400
@@ -51,7 +51,8 @@ struct rpc_clnt {
 				cl_intr     : 1,/* interruptible */
 				cl_autobind : 1,/* use getport() */
 				cl_oneshot  : 1,/* dispose after use */
-				cl_dead     : 1;/* abandoned */
+				cl_dead     : 1,/* abandoned */
+				cl_broken   : 1;/* no responce for too long */
 
 	struct rpc_rtt *	cl_rtt;		/* RTO estimator data */
 	struct rpc_portmap *	cl_pmap;	/* port mapping */
@@ -64,6 +65,8 @@ struct rpc_clnt {
 	struct rpc_rtt		cl_rtt_default;
 	struct rpc_portmap	cl_pmap_default;
 	char			cl_inline_name[32];
+
+	unsigned long		cl_pr_time;
 };
 #define cl_timeout		cl_xprt->timeout
 #define cl_prog			cl_pmap->pm_prog
diff -upr linux-2.6.16.46-0.12.orig/include/linux/sunrpc/debug.h linux-2.6.16.46-0.12-027test011/include/linux/sunrpc/debug.h
--- linux-2.6.16.46-0.12.orig/include/linux/sunrpc/debug.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/sunrpc/debug.h	2007-08-28 17:35:32.000000000 +0400
@@ -97,6 +97,7 @@ enum {
 	CTL_SLOTTABLE_TCP,
 	CTL_MIN_RESVPORT,
 	CTL_MAX_RESVPORT,
+	CTL_ABORT_TIMEOUT,
 };
 
 #endif /* _LINUX_SUNRPC_DEBUG_H_ */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/sunrpc/xprt.h linux-2.6.16.46-0.12-027test011/include/linux/sunrpc/xprt.h
--- linux-2.6.16.46-0.12.orig/include/linux/sunrpc/xprt.h	2007-08-24 19:28:27.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/sunrpc/xprt.h	2007-08-28 17:35:33.000000000 +0400
@@ -41,6 +41,14 @@ extern unsigned int xprt_max_resvport;
 #define RPC_DEF_MAX_RESVPORT	(1023U)
 
 /*
+ * Grand abort timeout (stop the client if occures)
+ */
+extern int xprt_abort_timeout;
+
+#define RPC_MIN_ABORT_TIMEOUT	300
+#define RPC_MAX_ABORT_TIMEOUT	INT_MAX
+
+/*
  * This describes a timeout strategy
  */
 struct rpc_timeout {
@@ -120,6 +128,7 @@ struct rpc_xprt {
 	struct rpc_xprt_ops *	ops;		/* transport methods */
 	struct socket *		sock;		/* BSD socket layer */
 	struct sock *		inet;		/* INET layer */
+	struct ve_struct *	owner_env;	/* VE owner of mount */
 
 	struct rpc_timeout	timeout;	/* timeout parms */
 	struct sockaddr_in	addr;		/* server address */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/swap.h linux-2.6.16.46-0.12-027test011/include/linux/swap.h
--- linux-2.6.16.46-0.12.orig/include/linux/swap.h	2007-08-24 19:28:27.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/swap.h	2007-08-28 17:35:36.000000000 +0400
@@ -16,6 +16,7 @@ struct notifier_block;
 #define SWAP_FLAG_PREFER	0x8000	/* set if swap priority specified */
 #define SWAP_FLAG_PRIO_MASK	0x7fff
 #define SWAP_FLAG_PRIO_SHIFT	0
+#define SWAP_FLAG_READONLY	0x40000000      /* set if swap is read-only */
 
 static inline int current_is_kswapd(void)
 {
@@ -82,6 +83,7 @@ struct address_space;
 struct sysinfo;
 struct writeback_control;
 struct zone;
+struct user_beancounter;
 
 /*
  * A swap extent maps a range of a swapfile's PAGE_SIZE pages onto a range of
@@ -111,6 +113,7 @@ enum {
 	SWP_ACTIVE	= (SWP_USED | SWP_WRITEOK),
 					/* add others here before... */
 	SWP_SCANNING	= (1 << 8),	/* refcount in scan_swap_map */
+	SWP_READONLY	= (1 << 2),
 };
 
 #define SWAP_CLUSTER_MAX 32
@@ -121,6 +124,7 @@ enum {
 /*
  * The in-memory structure used to track swap areas.
  */
+struct user_beancounter;
 struct swap_info_struct {
 	unsigned int flags;
 	int prio;			/* swap priority */
@@ -138,6 +142,9 @@ struct swap_info_struct {
 	unsigned int max;
 	unsigned int inuse_pages;
 	int next;			/* next entry on swap list */
+#ifdef CONFIG_USER_SWAP_ACCOUNTING
+	struct user_beancounter **swap_ubs;
+#endif
 };
 
 struct swap_list_t {
@@ -145,6 +152,9 @@ struct swap_list_t {
 	int next;	/* swapfile to be used next */
 };
 
+extern struct swap_list_t swap_list;
+extern struct swap_info_struct swap_info[MAX_SWAPFILES];
+
 /* Swap 50% full? Release swapcache more aggressively.. */
 #define vm_swap_full() (nr_swap_pages*2 < total_swap_pages)
 
@@ -152,6 +162,8 @@ struct swap_list_t {
 extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order);
 extern int register_oom_notifier(struct notifier_block *nb);
 extern int unregister_oom_notifier(struct notifier_block *nb);
+extern struct mm_struct *oom_kill_process(struct task_struct *p, const char *message);
+extern struct task_struct *oom_select_bad_process(struct user_beancounter *ub);
 
 /* linux/mm/memory.c */
 extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *);
@@ -232,6 +244,9 @@ extern void show_swap_cache_info(void);
 extern int add_to_swap(struct page *, gfp_t);
 extern void __delete_from_swap_cache(struct page *);
 extern void __delete_from_swap_cache_nocheck(struct page *);
+extern int add_to_swap_cache(struct page *page, swp_entry_t entry);
+extern int __add_to_swap_cache(struct page *page,
+			       swp_entry_t entry, gfp_t gfp_mask);
 extern void delete_from_swap_cache(struct page *);
 extern int move_to_swap_cache(struct page *, swp_entry_t);
 extern int move_from_swap_cache(struct page *, unsigned long,
@@ -246,7 +261,7 @@ extern long total_swap_pages;
 extern unsigned int nr_swapfiles;
 extern struct swap_info_struct swap_info[];
 extern void si_swapinfo(struct sysinfo *);
-extern swp_entry_t get_swap_page(void);
+extern swp_entry_t get_swap_page(struct user_beancounter *);
 extern swp_entry_t get_swap_page_of_type(int type);
 extern int swap_duplicate(swp_entry_t);
 extern int valid_swaphandles(swp_entry_t, unsigned long *);
@@ -256,10 +271,13 @@ extern sector_t map_swap_page(struct swa
 extern struct swap_info_struct *get_swap_info_struct(unsigned);
 extern int can_share_swap_page(struct page *);
 extern int remove_exclusive_swap_page(struct page *);
+extern int try_to_remove_exclusive_swap_page(struct page *);
 struct backing_dev_info;
 
 extern spinlock_t swap_lock;
-extern int remove_vma_swap(struct vm_area_struct *vma, struct page *page);
+struct page_beancounter;
+extern int remove_vma_swap(struct vm_area_struct *vma, struct page *page,
+		struct page_beancounter **pb);
 
 /* linux/mm/thrash.c */
 extern struct mm_struct * swap_token_mm;
@@ -317,7 +335,7 @@ static inline int remove_exclusive_swap_
 	return 0;
 }
 
-static inline swp_entry_t get_swap_page(void)
+static inline swp_entry_t get_swap_page(struct user_beancounter *ub)
 {
 	swp_entry_t entry;
 	entry.val = 0;
diff -upr linux-2.6.16.46-0.12.orig/include/linux/sysctl.h linux-2.6.16.46-0.12-027test011/include/linux/sysctl.h
--- linux-2.6.16.46-0.12.orig/include/linux/sysctl.h	2007-08-24 19:28:27.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/sysctl.h	2007-08-28 17:35:34.000000000 +0400
@@ -154,6 +154,15 @@ enum
 	KERN_HZ=76,		/* unsigned long: internal kernel HZ */
 	KERN_RCU_MASK=77,       /* int: mask for slow rcu callback processing */
 	KERN_KDUMP_ON_INIT=78,  /* int: ia64 kdump with INIT */
+	KERN_VIRT_PIDS=202,	/* int: VE pids virtualization */
+	KERN_VIRT_OSRELEASE=205,/* virtualization of utsname.release */
+	KERN_VE_MEMINFO=208,    /* int: use privvmpages(0) or oomguarpages(1) */
+	KERN_VE_ALLOW_KTHREADS=207,
+	KERN_FAIRSCHED_MAX_LATENCY=211, /* int: Max start_tag delta */
+	KERN_VCPU_SCHED_TIMESLICE=212,
+	KERN_VCPU_TIMESLICE=213,
+	KERN_SCALE_VCPU_FREQUENCY=214,	/* Scale cpu frequency inside VE */
+	KERN_VCPU_HOT_TIMESLICE=215,
 };
 
 
@@ -405,10 +414,14 @@ enum
 	NET_TCP_ABC=111,
 	NET_IPV4_IPFRAG_MAX_DIST=112,
 	NET_TCP_DMA_COPYBREAK=113,
+	NET_TCP_MAX_TW_BUCKETS_UB=151,
+	NET_TCP_MAX_TW_KMEM_FRACTION=152,
+	NET_TCP_PORT_FORWARD_RANGE=190,
 };
 
 enum {
 	NET_IPV4_ROUTE_FLUSH=1,
+	NET_IPV4_ROUTE_SRC_CHECK=188,
 	NET_IPV4_ROUTE_MIN_DELAY=2,
 	NET_IPV4_ROUTE_MAX_DELAY=3,
 	NET_IPV4_ROUTE_GC_THRESH=4,
@@ -768,6 +781,12 @@ enum
 	FS_AIO_NR=18,	/* current system-wide number of aio requests */
 	FS_AIO_MAX_NR=19,	/* system-wide maximum number of aio requests */
 	FS_INOTIFY=20,	/* inotify submenu */
+ 	FS_AT_VSYSCALL=21,	/* int: to announce vsyscall data */
+};
+
+/* /proc/sys/debug */
+enum {
+	DBG_DECODE_CALLTRACES = 1,	/* int: decode call traces on oops */
 };
 
 /* /proc/sys/fs/quota/ */
@@ -878,6 +897,8 @@ enum
 #ifdef __KERNEL__
 #include <linux/list.h>
 
+extern int ve_allow_kthreads;
+
 extern void sysctl_init(void);
 
 typedef struct ctl_table ctl_table;
@@ -908,6 +929,8 @@ extern int proc_doulongvec_minmax(ctl_ta
 				  void __user *, size_t *, loff_t *);
 extern int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int,
 				      struct file *, void __user *, size_t *, loff_t *);
+extern int proc_doutsstring(ctl_table *table, int write, struct file *,
+			    void __user *, size_t *, loff_t *);
 
 extern int do_sysctl (int __user *name, int nlen,
 		      void __user *oldval, size_t __user *oldlenp,
@@ -922,6 +945,7 @@ extern ctl_handler sysctl_string;
 extern ctl_handler sysctl_intvec;
 extern ctl_handler sysctl_jiffies;
 extern ctl_handler sysctl_ms_jiffies;
+extern ctl_handler sysctl_strategy_bset;
 
 
 /*
@@ -962,6 +986,8 @@ extern ctl_handler sysctl_ms_jiffies;
  */
 
 /* A sysctl table is an array of struct ctl_table: */
+struct ve_struct;
+
 struct ctl_table 
 {
 	int ctl_name;			/* Binary ID */
@@ -975,6 +1001,7 @@ struct ctl_table 
 	struct proc_dir_entry *de;	/* /proc control block */
 	void *extra1;
 	void *extra2;
+	struct ve_struct *owner_env;
 };
 
 /* struct ctl_table_header is used to maintain dynamic lists of
@@ -1001,6 +1028,9 @@ struct ctl_table_header * register_sysct
 						struct ctl_path *path);
 void unregister_sysctl_table(struct ctl_table_header * table);
 
+ctl_table *clone_sysctl_template(ctl_table *tmpl);
+void free_sysctl_clone(ctl_table *clone);
+
 #else /* __KERNEL__ */
 
 #endif /* __KERNEL__ */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/sysfs.h linux-2.6.16.46-0.12-027test011/include/linux/sysfs.h
--- linux-2.6.16.46-0.12.orig/include/linux/sysfs.h	2007-08-24 19:28:20.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/sysfs.h	2007-08-28 17:35:31.000000000 +0400
@@ -120,6 +120,8 @@ void sysfs_remove_group(struct kobject *
 
 void sysfs_printk_last_file(void);
 
+extern struct file_system_type sysfs_fs_type;
+
 #else /* CONFIG_SYSFS */
 
 static inline int sysfs_create_dir(struct kobject * k)
diff -upr linux-2.6.16.46-0.12.orig/include/linux/task_io_accounting_ops.h linux-2.6.16.46-0.12-027test011/include/linux/task_io_accounting_ops.h
--- linux-2.6.16.46-0.12.orig/include/linux/task_io_accounting_ops.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/task_io_accounting_ops.h	2007-08-28 17:35:30.000000000 +0400
@@ -0,0 +1,32 @@
+/*
+ * Task I/O accounting operations
+ */
+#ifndef __TASK_IO_ACCOUNTING_OPS_INCLUDED
+#define __TASK_IO_ACCOUNTING_OPS_INCLUDED
+
+#include <ub/io_acct.h>
+
+static inline void task_io_account_read(size_t bytes)
+{
+	ub_io_account_read(bytes);
+}
+
+static inline void task_io_account_write(struct page *page, size_t bytes,
+		int sync)
+{
+	if (sync)
+		ub_io_account_write(bytes);
+	else
+		ub_io_account_dirty(page, bytes);
+
+}
+
+static inline void task_io_account_cancelled_write(size_t bytes)
+{
+	ub_io_account_write_cancelled(bytes);
+}
+
+static inline void task_io_accounting_init(struct task_struct *tsk)
+{
+}
+#endif		/* __TASK_IO_ACCOUNTING_OPS_INCLUDED */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/time.h linux-2.6.16.46-0.12-027test011/include/linux/time.h
--- linux-2.6.16.46-0.12.orig/include/linux/time.h	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/time.h	2007-08-28 17:35:30.000000000 +0400
@@ -110,7 +110,7 @@ extern void do_gettimeofday(struct timev
 extern int do_settimeofday(struct timespec *tv);
 extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz);
 #define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts)
-extern long do_utimes(int dfd, char __user *filename, struct timeval *times);
+extern long do_utimes(int dfd, char __user *filename, struct timeval *times, int flags);
 struct itimerval;
 extern int do_setitimer(int which, struct itimerval *value,
 			struct itimerval *ovalue);
diff -upr linux-2.6.16.46-0.12.orig/include/linux/tty.h linux-2.6.16.46-0.12-027test011/include/linux/tty.h
--- linux-2.6.16.46-0.12.orig/include/linux/tty.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/tty.h	2007-08-28 17:35:31.000000000 +0400
@@ -238,6 +238,7 @@ struct tty_struct {
 	spinlock_t read_lock;
 	/* If the tty has a pending do_SAK, queue it here - akpm */
 	struct work_struct SAK_work;
+	struct ve_struct *owner_env;
 };
 
 /* tty magic number */
@@ -266,6 +267,7 @@ struct tty_struct {
 #define TTY_PTY_LOCK 		16	/* pty private */
 #define TTY_NO_WRITE_SPLIT 	17	/* Preserve write boundaries to driver */
 #define TTY_HUPPED 		18	/* Post driver->hangup() */
+#define TTY_CHARGED		19	/* Charged as ub resource */
 
 #define TTY_WRITE_FLUSH(tty) tty_write_flush((tty))
 
diff -upr linux-2.6.16.46-0.12.orig/include/linux/tty_driver.h linux-2.6.16.46-0.12-027test011/include/linux/tty_driver.h
--- linux-2.6.16.46-0.12.orig/include/linux/tty_driver.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/tty_driver.h	2007-08-28 17:35:31.000000000 +0400
@@ -214,14 +214,29 @@ struct tty_driver {
 			unsigned int set, unsigned int clear);
 
 	struct list_head tty_drivers;
+	struct ve_struct *owner_env;
 };
 
+#ifdef CONFIG_UNIX98_PTYS
+extern struct tty_driver *ptm_driver;	/* Unix98 pty masters; for /dev/ptmx */
+extern struct tty_driver *pts_driver;	/* Unix98 pty slaves;  for /dev/ptmx */
+#endif
+
+#ifdef CONFIG_LEGACY_PTYS
+extern struct tty_driver *pty_driver;
+extern struct tty_driver *pty_slave_driver;
+#endif
+
 extern struct list_head tty_drivers;
+extern rwlock_t tty_driver_guard;
 
 struct tty_driver *alloc_tty_driver(int lines);
 void put_tty_driver(struct tty_driver *driver);
 void tty_set_operations(struct tty_driver *driver, struct tty_operations *op);
 
+struct class *init_ve_tty_class(void);
+void fini_ve_tty_class(struct class *ve_tty_class);
+
 /* tty driver magic number */
 #define TTY_DRIVER_MAGIC		0x5402
 
diff -upr linux-2.6.16.46-0.12.orig/include/linux/utsname.h linux-2.6.16.46-0.12-027test011/include/linux/utsname.h
--- linux-2.6.16.46-0.12.orig/include/linux/utsname.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/utsname.h	2007-08-28 17:35:31.000000000 +0400
@@ -33,4 +33,6 @@ struct new_utsname {
 extern struct new_utsname system_utsname;
 
 extern struct rw_semaphore uts_sem;
+
+extern struct new_utsname virt_utsname;
 #endif
diff -upr linux-2.6.16.46-0.12.orig/include/linux/ve.h linux-2.6.16.46-0.12-027test011/include/linux/ve.h
--- linux-2.6.16.46-0.12.orig/include/linux/ve.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/ve.h	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,375 @@
+/*
+ *  include/linux/ve.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _LINUX_VE_H
+#define _LINUX_VE_H
+
+#include <linux/config.h>
+
+#ifndef __ENVID_T_DEFINED__
+typedef unsigned envid_t;
+#define __ENVID_T_DEFINED__
+#endif
+
+#include <linux/types.h>
+#include <linux/capability.h>
+#include <linux/utsname.h>
+#include <linux/sysctl.h>
+#include <linux/net.h>
+#include <linux/vzstat.h>
+#include <linux/kobject.h>
+
+#ifdef VZMON_DEBUG
+#  define VZTRACE(fmt,args...) \
+	printk(KERN_DEBUG fmt, ##args)
+#else
+#  define VZTRACE(fmt,args...)
+#endif /* VZMON_DEBUG */
+
+struct tty_driver;
+struct devpts_config;
+struct task_struct;
+struct new_utsname;
+struct file_system_type;
+struct icmp_mib;
+struct ip_mib;
+struct tcp_mib;
+struct udp_mib;
+struct linux_mib;
+struct fib_info;
+struct fib_rule;
+struct veip_struct;
+struct ve_monitor;
+
+#if defined(CONFIG_VE) && defined(CONFIG_INET)
+struct fib_table;
+struct devcnfv4_struct;
+struct ve_nfs_context;
+#ifdef CONFIG_VE_IPTABLES
+struct xt_af;
+struct xt_table;
+struct xt_target;
+struct ip_conntrack;
+typedef unsigned int (*ip_nat_helper_func)(void);
+struct ve_ip_conntrack {
+	struct list_head 	*_ip_conntrack_hash;
+	struct list_head	_ip_conntrack_expect_list;
+	struct list_head	_ip_conntrack_unconfirmed;
+	struct ip_conntrack_protocol ** _ip_ct_protos;
+	struct list_head	_ip_conntrack_helpers;
+	int 			_ip_conntrack_max;
+	int			_ip_conntrack_vmalloc;
+	atomic_t		_ip_conntrack_count;
+	void (*_ip_conntrack_destroyed)(struct ip_conntrack *conntrack);
+#ifdef CONFIG_SYSCTL
+	unsigned long		_ip_ct_tcp_timeouts[10];
+	unsigned long		_ip_ct_udp_timeout;
+	unsigned long		_ip_ct_udp_timeout_stream;
+	unsigned long		_ip_ct_icmp_timeout;
+	unsigned long		_ip_ct_generic_timeout;
+	unsigned int		_ip_ct_log_invalid;
+	unsigned long		_ip_ct_tcp_timeout_max_retrans;
+	int			_ip_ct_tcp_loose;
+	int			_ip_ct_tcp_be_liberal;
+	int			_ip_ct_tcp_max_retrans;
+	struct ctl_table_header *_ip_ct_sysctl_header;
+	ctl_table		*_ip_ct_net_table;
+	ctl_table		*_ip_ct_ipv4_table;
+	ctl_table		*_ip_ct_netfilter_table;
+	ctl_table		*_ip_ct_sysctl_table;
+#endif /*CONFIG_SYSCTL*/
+
+	struct ip_nat_protocol	**_ip_nat_protos;
+	ip_nat_helper_func	_ip_nat_ftp_hook;
+	ip_nat_helper_func	_ip_nat_irc_hook;
+	struct list_head	*_ip_nat_bysource;
+	struct xt_table		*_ip_nat_table;
+
+	/* resource accounting */
+	struct user_beancounter *ub;
+};
+#endif
+#endif
+
+#define UIDHASH_BITS_VE		6
+#define UIDHASH_SZ_VE		(1 << UIDHASH_BITS_VE)
+
+struct ve_cpu_stats {
+	cycles_t	idle_time;
+	cycles_t	iowait_time;
+	cycles_t	strt_idle_time;
+	cycles_t	used_time;
+	seqcount_t	stat_lock;
+	int		nr_running;
+	int		nr_unint;
+	cputime64_t	user;
+	cputime64_t	nice;
+	cputime64_t	system;
+} ____cacheline_aligned;
+
+struct ve_struct {
+	struct list_head	ve_list;
+
+	envid_t			veid;
+	struct task_struct	*init_entry;
+	struct list_head	vetask_lh;
+/*
+ * in 2.6.16 task list is strongly protected with tasklist_lock
+ * even for reading, so no need in another aux list
+ */
+#define vetask_auxlist		vetask_lh
+	/* capability bounding set */
+	kernel_cap_t		ve_cap_bset;
+	atomic_t		pcounter;
+	/* ref counter to ve from ipc */
+	atomic_t		counter;
+	unsigned int		class_id;
+	struct rw_semaphore	op_sem;
+	int			is_running;
+	int			is_locked;
+	atomic_t		suspend;
+	int			virt_pids;
+	/* see vzcalluser.h for VE_FEATURE_XXX definitions */
+	__u64			features;
+
+/* VE's root */
+	struct vfsmount 	*fs_rootmnt;
+	struct dentry 		*fs_root;
+
+/* sysctl */
+	struct new_utsname	*utsname;
+	struct list_head	sysctl_lh;
+	struct ctl_table_header	*kern_header;
+	struct ctl_table	*kern_table;
+	struct ctl_table_header	*quota_header;
+	struct ctl_table	*quota_table;
+	struct file_system_type *proc_fstype;
+	struct vfsmount		*proc_mnt;
+	struct proc_dir_entry	*proc_root;
+	struct proc_dir_entry	*proc_sys_root;
+	struct proc_dir_entry	*_proc_net;
+	struct proc_dir_entry	*_proc_net_stat;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct proc_dir_entry	*_proc_net_devsnmp6;
+#endif
+
+/* SYSV IPC */
+	struct ipc_ids		*_shm_ids;
+	struct ipc_ids		*_msg_ids;
+	struct ipc_ids		*_sem_ids;
+	int			_used_sems;
+	int			_shm_tot;
+	size_t			_shm_ctlmax;
+	size_t			_shm_ctlall;
+	int			_shm_ctlmni;
+	int			_msg_ctlmax;
+	int			_msg_ctlmni;
+	int			_msg_ctlmnb;
+	int			_sem_ctls[4];
+
+/* BSD pty's */
+#ifdef CONFIG_LEGACY_PTYS
+	struct tty_driver       *pty_driver;
+	struct tty_driver       *pty_slave_driver;
+#endif
+#ifdef CONFIG_UNIX98_PTYS
+	struct tty_driver	*ptm_driver;
+	struct tty_driver	*pts_driver;
+	struct idr		*allocated_ptys;
+	struct file_system_type *devpts_fstype;
+	struct vfsmount		*devpts_mnt;
+	struct dentry		*devpts_root;
+	struct devpts_config	*devpts_config;
+#endif
+
+	struct ve_nfs_context	*nfs_context;
+
+	struct file_system_type *shmem_fstype;
+	struct vfsmount		*shmem_mnt;
+#ifdef CONFIG_SYSFS
+	struct file_system_type *sysfs_fstype;
+	struct vfsmount		*sysfs_mnt;
+	struct super_block	*sysfs_sb;
+	struct sysfs_dirent	*sysfs_root;
+#endif
+	struct subsystem	*class_subsys;
+	struct subsystem	*class_obj_subsys;
+	struct class		*tty_class;
+
+/* User uids hash */
+	struct list_head	uidhash_table[UIDHASH_SZ_VE];
+
+#ifdef CONFIG_NET
+	struct class		*net_class;
+	struct hlist_head	_net_dev_head;
+	struct hlist_head	_net_dev_index_head;
+	struct net_device	*_net_dev_base, **_net_dev_tail;
+	int			ifindex;
+	struct net_device	*_loopback_dev;
+	struct net_device_stats	*_loopback_stats;
+#ifdef CONFIG_INET
+	struct ipv4_devconf	*_ipv4_devconf;
+	struct ipv4_devconf	*_ipv4_devconf_dflt;
+	struct ctl_table_header	*forward_header;
+	struct ctl_table	*forward_table;
+ 	unsigned long		rt_flush_required;
+	struct neigh_table	*ve_arp_tbl;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct ipv6_devconf	*_ipv6_devconf;
+	struct ipv6_devconf	*_ipv6_devconf_dflt;
+	struct neigh_table	*ve_nd_tbl;
+#endif
+#endif
+#endif
+#if defined(CONFIG_VE_NETDEV) || defined (CONFIG_VE_NETDEV_MODULE)
+	struct veip_struct	*veip;
+	struct net_device	*_venet_dev;
+#endif
+
+/* per VE CPU stats*/
+	struct timespec		start_timespec;
+	u64			start_jiffies;	/* Deprecated */
+	cycles_t 		start_cycles;
+	unsigned long		avenrun[3];	/* loadavg data */
+
+	cycles_t 		cpu_used_ve;
+	struct kstat_lat_pcpu_struct	sched_lat_ve;
+
+#ifdef CONFIG_INET
+	struct hlist_head	*_fib_info_hash;
+	struct hlist_head	*_fib_info_laddrhash;
+	int			_fib_hash_size;
+	int			_fib_info_cnt;
+
+	struct fib_rule		*_local_rule;
+	struct fib_rule		*_fib_rules;
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	/* XXX: why a magic constant? */
+	struct fib_table 	*_fib_tables[256]; /* RT_TABLE_MAX - for now */
+#else
+	struct fib_table	*_main_table;
+	struct fib_table	*_local_table;
+#endif
+	struct icmp_mib		*_icmp_statistics[2];
+	struct ipstats_mib	*_ip_statistics[2];
+	struct tcp_mib		*_tcp_statistics[2];
+	struct udp_mib		*_udp_statistics[2];
+	struct linux_mib	*_net_statistics[2];
+	struct venet_stat       *stat;
+#ifdef CONFIG_VE_IPTABLES
+/* core/netfilter.c virtualization */
+	void			*_nf_hooks;
+	struct xt_table		*_ve_ipt_filter_pf; /* packet_filter struct */
+	struct xt_table		*_ve_ip6t_filter_pf;
+	struct xt_table		*_ipt_mangle_table;
+	struct xt_table		*_ip6t_mangle_table;
+	struct list_head	_xt_tables[NPROTO];
+
+	__u64			_iptables_modules;
+	struct ve_ip_conntrack	*_ip_conntrack;
+#endif /* CONFIG_VE_IPTABLES */
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct fib6_table	*_fib6_table;
+	struct ipstats_mib	*_ipv6_statistics[2];
+	struct icmpv6_mib	*_icmpv6_statistics[2];
+	struct udp_mib		*_udp_stats_in6[2];
+#endif
+#endif
+	wait_queue_head_t	*_log_wait;
+	unsigned long		*_log_start;
+	unsigned long		*_log_end;
+	unsigned long		*_logged_chars;
+	char			*log_buf;
+#define VE_DEFAULT_LOG_BUF_LEN	4096
+
+	struct ve_cpu_stats	*cpu_stats;
+	unsigned long		down_at;
+	struct list_head	cleanup_list;
+#if defined(CONFIG_FUSE_FS) || defined(CONFIG_FUSE_FS_MODULE)
+	struct list_head	_fuse_conn_list;
+	struct super_block	*_fuse_control_sb;
+
+	struct file_system_type	*fuse_fs_type;
+	struct file_system_type	*fuse_ctl_fs_type;
+#endif
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+	struct proc_dir_entry	*_proc_vlan_dir;
+	struct proc_dir_entry	*_proc_vlan_conf;
+#endif
+	unsigned long		jiffies_fixup;
+	unsigned char		disable_net;
+ 	unsigned char		sparse_vpid;
+	struct ve_monitor	*monitor;
+	struct proc_dir_entry	*monitor_proc;
+	unsigned long		meminfo_val;
+};
+
+#define VE_CPU_STATS(ve, cpu)	(per_cpu_ptr((ve)->cpu_stats, cpu))
+
+extern int nr_ve;
+
+#ifdef CONFIG_VE
+
+void do_update_load_avg_ve(void);
+void do_env_free(struct ve_struct *ptr);
+
+#define ve_utsname (*get_exec_env()->utsname)
+
+static inline struct ve_struct *get_ve(struct ve_struct *ptr)
+{
+	if (ptr != NULL)
+		atomic_inc(&ptr->counter);
+	return ptr;
+}
+
+static inline void put_ve(struct ve_struct *ptr)
+{
+	if (ptr && atomic_dec_and_test(&ptr->counter)) {
+		if (atomic_read(&ptr->pcounter) > 0)
+			BUG();
+		if (ptr->is_running)
+			BUG();
+		do_env_free(ptr);
+	}
+}
+
+static inline void pget_ve(struct ve_struct *ptr)
+{
+	atomic_inc(&ptr->pcounter);
+}
+
+void ve_cleanup_schedule(struct ve_struct *);
+static inline void pput_ve(struct ve_struct *ptr)
+{
+	if (unlikely(atomic_dec_and_test(&ptr->pcounter)))
+		ve_cleanup_schedule(ptr);
+}
+
+extern spinlock_t ve_cleanup_lock;
+extern struct list_head ve_cleanup_list;
+extern struct task_struct *ve_cleanup_thread;
+
+extern unsigned long long ve_relative_clock(struct timespec * ts);
+
+#ifdef CONFIG_FAIRSCHED
+#define ve_cpu_online_map(ve, mask) fairsched_cpu_online_map(ve->veid, mask)
+#else
+#define ve_cpu_online_map(ve, mask) do { *(mask) = cpu_online_map; } while (0)
+#endif
+#else	/* CONFIG_VE */
+#define ve_utsname	system_utsname
+#define get_ve(ve)	(NULL)
+#define put_ve(ve)	do { } while (0)
+#define pget_ve(ve)	do { } while (0)
+#define pput_ve(ve)	do { } while (0)
+#endif	/* CONFIG_VE */
+
+#endif /* _LINUX_VE_H */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/ve_nfs.h linux-2.6.16.46-0.12-027test011/include/linux/ve_nfs.h
--- linux-2.6.16.46-0.12.orig/include/linux/ve_nfs.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/ve_nfs.h	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,35 @@
+/*
+ * linux/include/ve_nfs.h
+ *
+ * VE context for NFS
+ *
+ * Copyright (C) 2007 SWsoft
+ */
+
+#ifndef __VE_NFS_H__
+#define __VE_NFS_H__
+
+#ifdef CONFIG_VE
+struct ve_nfs_context {
+	struct file_system_type *fstype;
+	unsigned int		_nlmsvc_users;
+	pid_t			_nlmsvc_pid;
+	int			_nlmsvc_grace_period;
+	unsigned long		_nlmsvc_timeout;
+};
+
+#define NFS_CTX_FIELD(arg)						\
+        (*(get_exec_env()->nfs_context == NULL ? &_##arg :		\
+		       &get_exec_env()->nfs_context->_##arg))
+#define nlmsvc_grace_period	NFS_CTX_FIELD(nlmsvc_grace_period)
+#define nlmsvc_timeout		NFS_CTX_FIELD(nlmsvc_timeout)
+#define nlmsvc_users		NFS_CTX_FIELD(nlmsvc_users)
+#define nlmsvc_pid		NFS_CTX_FIELD(nlmsvc_pid)
+#else
+#define nlmsvc_grace_period	_nlmsvc_timeout
+#define nlmsvc_timeout		_nlmsvc_grace_period
+#define nlmsvc_pid		_nlmsvc_pid
+#define nlmsvc_timeout		_nlmsvc_timeout
+#endif
+
+#endif
diff -upr linux-2.6.16.46-0.12.orig/include/linux/ve_proto.h linux-2.6.16.46-0.12-027test011/include/linux/ve_proto.h
--- linux-2.6.16.46-0.12.orig/include/linux/ve_proto.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/ve_proto.h	2007-08-28 17:35:36.000000000 +0400
@@ -0,0 +1,91 @@
+/*
+ *  include/linux/ve_proto.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __VE_H__
+#define __VE_H__
+
+#ifdef CONFIG_VE
+
+struct ve_struct;
+
+#ifdef CONFIG_INET
+void ip_fragment_cleanup(struct ve_struct *envid);
+void tcp_v4_kill_ve_sockets(struct ve_struct *envid);
+struct fib_table * fib_hash_init(int id);
+#ifdef CONFIG_VE_NETDEV
+int venet_init(void);
+#endif
+#else
+static inline void ip_fragment_cleanup(struct ve_struct *ve) { ; }
+#endif
+
+extern struct list_head ve_list_head;
+#define for_each_ve(ve)	list_for_each_entry((ve), &ve_list_head, ve_list)
+extern rwlock_t ve_list_lock;
+extern struct ve_struct *get_ve_by_id(envid_t);
+extern struct ve_struct *__find_ve_by_id(envid_t);
+
+struct env_create_param3;
+extern int real_env_create(envid_t veid, unsigned flags, u32 class_id,
+			   struct env_create_param3 *data, int datalen);
+
+void ve_move_task(struct task_struct *tsk, struct ve_struct *new,
+		struct ve_struct *old);
+int set_device_perms_ve(envid_t veid, unsigned type, dev_t dev, unsigned mask);
+int get_device_perms_ve(int dev_type, dev_t dev, int access_mode);
+void clean_device_perms_ve(envid_t veid);
+extern struct file_operations proc_devperms_ops;
+
+enum {
+	VE_SS_CHAIN,
+
+	VE_MAX_CHAINS
+};
+
+typedef int ve_hook_init_fn(void *data);
+typedef void ve_hook_fini_fn(void *data);
+
+struct ve_hook
+{
+	ve_hook_init_fn *init;
+	ve_hook_fini_fn *fini;
+	struct module *owner;
+
+	/* Functions are called in ascending priority */
+	int priority;
+
+	/* Private part */
+	struct list_head list;
+};
+
+enum {
+	HOOK_PRIO_DEFAULT = 0,
+
+	HOOK_PRIO_FS = HOOK_PRIO_DEFAULT,
+
+	HOOK_PRIO_NET_PRE,
+	HOOK_PRIO_NET,
+	HOOK_PRIO_NET_POST,
+
+	HOOK_PRIO_AFTERALL = INT_MAX
+};
+
+extern int ve_hook_iterate_init(int chain, void *data);
+extern void ve_hook_iterate_fini(int chain, void *data);
+
+extern void ve_hook_register(int chain, struct ve_hook *vh);
+extern void ve_hook_unregister(struct ve_hook *vh);
+#else /* CONFIG_VE */
+#define ve_hook_register(ch, vh)	do { } while (0)
+#define ve_hook_unregister(ve)		do { } while (0)
+
+#define get_device_perms_ve(t, d, a)	(0)
+#endif /* CONFIG_VE */
+#endif
diff -upr linux-2.6.16.46-0.12.orig/include/linux/ve_task.h linux-2.6.16.46-0.12-027test011/include/linux/ve_task.h
--- linux-2.6.16.46-0.12.orig/include/linux/ve_task.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/ve_task.h	2007-08-28 17:35:32.000000000 +0400
@@ -0,0 +1,73 @@
+/*
+ *  include/linux/ve_task.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __VE_TASK_H__
+#define __VE_TASK_H__
+
+#include <linux/seqlock.h>
+#include <asm/timex.h>
+
+struct ve_task_info {
+/* virtualization */
+	struct ve_struct *owner_env;
+	struct ve_struct *exec_env;
+	struct ve_struct *saved_env;
+	struct list_head vetask_list;
+/*
+ * in 2.6.16 task list is strongly protected with tasklist_lock
+ * even for reading, so no need in another aux list
+ */
+#define aux_list	vetask_list
+	struct dentry *glob_proc_dentry;
+/* statistics: scheduling latency */
+	cycles_t sleep_time;
+	cycles_t sched_time;
+	cycles_t sleep_stamp;
+	cycles_t wakeup_stamp;
+	seqcount_t wakeup_lock;
+};
+
+#define VE_TASK_INFO(task)	(&(task)->ve_task_info)
+#define VE_TASK_LIST_2_TASK(lh)	\
+	list_entry(lh, struct task_struct, ve_task_info.vetask_list)
+
+#ifdef CONFIG_VE
+extern struct ve_struct ve0;
+#define get_ve0()	(&ve0)
+
+#define ve_save_context(t)	do {				\
+		t->ve_task_info.saved_env = 			\
+				t->ve_task_info.exec_env;	\
+		t->ve_task_info.exec_env = get_ve0();		\
+	} while (0)
+#define ve_restore_context(t)	do {				\
+		t->ve_task_info.exec_env = 			\
+				t->ve_task_info.saved_env;	\
+	} while (0)
+
+#define get_exec_env()	(current->ve_task_info.exec_env)
+#define set_exec_env(ve)	({		\
+		struct ve_task_info *vi;	\
+		struct ve_struct *old;		\
+						\
+		vi = &current->ve_task_info;	\
+		old = vi->exec_env;		\
+		vi->exec_env = ve;		\
+		old;				\
+	})
+#else
+#define get_ve0()		(NULL)
+#define get_exec_env()		(NULL)
+#define set_exec_env(new_env)	(NULL)
+#define ve_save_context(t)	do { } while (0)
+#define ve_restore_context(t)	do { } while (0)
+#endif
+
+#endif /* __VE_TASK_H__ */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/veip.h linux-2.6.16.46-0.12-027test011/include/linux/veip.h
--- linux-2.6.16.46-0.12.orig/include/linux/veip.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/veip.h	2007-08-28 17:35:32.000000000 +0400
@@ -0,0 +1,15 @@
+#ifndef __VE_IP_H_
+#define __VE_IP_H_
+
+struct ve_addr_struct {
+	int family;
+	__u32 key[4];
+};
+
+struct sockaddr;
+
+extern void veaddr_print(char *, int, struct ve_addr_struct *);
+extern int sockaddr_to_veaddr(struct sockaddr __user *uaddr, int addrlen,
+		struct ve_addr_struct *veaddr);
+
+#endif
diff -upr linux-2.6.16.46-0.12.orig/include/linux/venet.h linux-2.6.16.46-0.12-027test011/include/linux/venet.h
--- linux-2.6.16.46-0.12.orig/include/linux/venet.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/venet.h	2007-08-28 17:35:32.000000000 +0400
@@ -0,0 +1,73 @@
+/*
+ *  include/linux/venet.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _VENET_H
+#define _VENET_H
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/vzcalluser.h>
+#include <linux/veip.h>
+
+#define VEIP_HASH_SZ 512
+
+struct ve_struct;
+struct venet_stat;
+
+struct ip_entry_struct
+{
+	struct ve_addr_struct	addr;
+	struct ve_struct	*active_env;
+	struct venet_stat	*stat;
+	struct veip_struct	*veip;
+	struct list_head 	ip_hash;
+	struct list_head 	ve_list;
+};
+
+struct veip_struct
+{
+	struct list_head	src_lh;
+	struct list_head	dst_lh;
+	struct list_head	ip_lh;
+	struct list_head	list;
+	envid_t			veid;
+};
+
+/* veip_hash_lock should be taken for write by caller */
+void ip_entry_hash(struct ip_entry_struct *entry, struct veip_struct *veip);
+/* veip_hash_lock should be taken for write by caller */
+void ip_entry_unhash(struct ip_entry_struct *entry);
+/* veip_hash_lock should be taken for read by caller */
+struct ip_entry_struct *venet_entry_lookup(struct ve_addr_struct *);
+
+/* veip_hash_lock should be taken for read by caller */
+struct veip_struct *veip_find(envid_t veid);
+/* veip_hash_lock should be taken for write by caller */
+struct veip_struct *veip_findcreate(envid_t veid);
+/* veip_hash_lock should be taken for write by caller */
+void veip_put(struct veip_struct *veip);
+
+extern struct list_head veip_lh;
+
+int veip_start(struct ve_struct *ve);
+void veip_stop(struct ve_struct *ve);
+__exit void veip_cleanup(void);
+int veip_entry_add(struct ve_struct *ve, struct ve_addr_struct *addr);
+int veip_entry_del(envid_t veid, struct ve_addr_struct *addr);
+int venet_change_skb_owner(struct sk_buff *skb);
+
+extern struct list_head ip_entry_hash_table[];
+extern rwlock_t veip_hash_lock;
+
+#ifdef CONFIG_PROC_FS
+int veip_seq_show(struct seq_file *m, void *v);
+#endif
+
+#endif
diff -upr linux-2.6.16.46-0.12.orig/include/linux/veprintk.h linux-2.6.16.46-0.12-027test011/include/linux/veprintk.h
--- linux-2.6.16.46-0.12.orig/include/linux/veprintk.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/veprintk.h	2007-08-28 17:35:31.000000000 +0400
@@ -0,0 +1,38 @@
+/*
+ *  include/linux/veprintk.h
+ *
+ *  Copyright (C) 2006  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __VE_PRINTK_H__
+#define __VE_PRINTK_H__
+
+#ifdef CONFIG_VE
+
+#define ve_log_wait		(*(get_exec_env()->_log_wait))
+#define ve_log_start		(*(get_exec_env()->_log_start))
+#define ve_log_end		(*(get_exec_env()->_log_end))
+#define ve_logged_chars		(*(get_exec_env()->_logged_chars))
+#define ve_log_buf		(get_exec_env()->log_buf)
+#define ve_log_buf_len		(ve_is_super(get_exec_env()) ? \
+				log_buf_len : VE_DEFAULT_LOG_BUF_LEN)
+#define VE_LOG_BUF_MASK		(ve_log_buf_len - 1)
+#define VE_LOG_BUF(idx)		(ve_log_buf[(idx) & VE_LOG_BUF_MASK])
+
+#else
+
+#define ve_log_wait		log_wait
+#define ve_log_start		log_start
+#define ve_log_end		log_end
+#define ve_logged_chars		logged_chars
+#define ve_log_buf		log_buf
+#define ve_log_buf_len		log_buf_len
+#define VE_LOG_BUF_MASK		LOG_BUF_MASK
+#define VE_LOG_BUF(idx)		LOG_BUF(idx)
+
+#endif /* CONFIG_VE */
+#endif /* __VE_PRINTK_H__ */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/virtinfo.h linux-2.6.16.46-0.12-027test011/include/linux/virtinfo.h
--- linux-2.6.16.46-0.12.orig/include/linux/virtinfo.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/virtinfo.h	2007-08-28 17:35:36.000000000 +0400
@@ -0,0 +1,88 @@
+/*
+ *  include/linux/virtinfo.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __LINUX_VIRTINFO_H
+#define __LINUX_VIRTINFO_H
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/page-flags.h>
+#include <linux/notifier.h>
+
+struct vnotifier_block
+{
+	int (*notifier_call)(struct vnotifier_block *self,
+			unsigned long, void *, int);
+	struct vnotifier_block *next;
+	int priority;
+};
+
+extern struct semaphore virtinfo_sem;
+void __virtinfo_notifier_register(int type, struct vnotifier_block *nb);
+void virtinfo_notifier_register(int type, struct vnotifier_block *nb);
+void virtinfo_notifier_unregister(int type, struct vnotifier_block *nb);
+int virtinfo_notifier_call(int type, unsigned long n, void *data);
+
+struct meminfo {
+	struct sysinfo si;
+	struct page_state ps;
+	unsigned long active, inactive;
+	unsigned long cache, swapcache;
+	unsigned long committed_space;
+	unsigned long allowed;
+	unsigned long vmalloc_total, vmalloc_used, vmalloc_largest;
+};
+
+#define VIRTINFO_MEMINFO	0
+#define VIRTINFO_ENOUGHMEM	1
+#define VIRTINFO_DOFORK         2
+#define VIRTINFO_DOEXIT         3
+#define VIRTINFO_DOEXECVE       4
+#define VIRTINFO_DOFORKRET      5
+#define VIRTINFO_DOFORKPOST     6
+#define VIRTINFO_EXIT           7
+#define VIRTINFO_EXITMMAP       8
+#define VIRTINFO_EXECMMAP       9
+#define VIRTINFO_OUTOFMEM       10
+#define VIRTINFO_PAGEIN         11
+#define VIRTINFO_SYSINFO        12
+#define VIRTINFO_NEWUBC         13
+#define VIRTINFO_VMSTAT		14
+
+enum virt_info_types {
+	VITYPE_GENERAL,
+	VITYPE_FAUDIT,
+	VITYPE_QUOTA,
+	VITYPE_SCP,
+
+	VIRT_TYPES
+};
+
+#ifdef CONFIG_VZ_GENCALLS
+
+static inline int virtinfo_gencall(unsigned long n, void *data)
+{
+	int r;
+
+	r = virtinfo_notifier_call(VITYPE_GENERAL, n, data);
+	if (r & NOTIFY_FAIL)
+		return -ENOBUFS;
+	if (r & NOTIFY_OK)
+		return -ERESTARTNOINTR;
+	return 0;
+}
+
+#else
+
+#define virtinfo_gencall(n, data)	0
+
+#endif
+
+#endif /* __LINUX_VIRTINFO_H */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/virtinfoscp.h linux-2.6.16.46-0.12-027test011/include/linux/virtinfoscp.h
--- linux-2.6.16.46-0.12.orig/include/linux/virtinfoscp.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/virtinfoscp.h	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,21 @@
+#ifndef __VIRTINFO_SCP_H__
+#define __VIRTINFO_SCP_H__
+
+/*
+ * Dump and restore operations are non-symmetric.
+ * With respect to finish/fail hooks, 2 dump hooks are called from
+ * different proc operations, but restore hooks are called from a single one.
+ */
+#define VIRTINFO_SCP_COLLECT    0x10
+#define VIRTINFO_SCP_DUMP       0x11
+#define VIRTINFO_SCP_DMPFIN     0x12
+#define VIRTINFO_SCP_RSTCHECK   0x13
+#define VIRTINFO_SCP_RESTORE    0x14
+#define VIRTINFO_SCP_RSTFAIL    0x15
+
+#define VIRTINFO_SCP_RSTTSK     0x20
+#define VIRTINFO_SCP_RSTMM      0x21
+
+#define VIRTNOTIFY_CHANGE       0x100 
+
+#endif /* __VIRTINFO_SCP_H__ */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/vmalloc.h linux-2.6.16.46-0.12-027test011/include/linux/vmalloc.h
--- linux-2.6.16.46-0.12.orig/include/linux/vmalloc.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/linux/vmalloc.h	2007-08-28 17:35:31.000000000 +0400
@@ -18,6 +18,10 @@
 #define IOREMAP_MAX_ORDER	(7 + PAGE_SHIFT)	/* 128 pages */
 #endif
 
+/* align size to 2^n page boundary */
+#define POWER2_PAGE_ALIGN(size) \
+	((typeof(size))(1UL << (PAGE_SHIFT + get_order(size))))
+
 struct vm_struct {
 	void			*addr;
 	unsigned long		size;
@@ -32,10 +36,14 @@ struct vm_struct {
  *	Highlevel APIs for driver use
  */
 extern void *vmalloc(unsigned long size);
+extern void *ub_vmalloc(unsigned long size);
 extern void *vmalloc_node(unsigned long size, int node);
+extern void *ub_vmalloc_node(unsigned long size, int node);
 extern void *vmalloc_exec(unsigned long size);
 extern void *vmalloc_32(unsigned long size);
 extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot);
+extern void *vmalloc_best(unsigned long size);
+extern void *ub_vmalloc_best(unsigned long size);
 extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask,
 				pgprot_t prot);
 extern void *__vmalloc_node(unsigned long size, gfp_t gfp_mask,
@@ -52,6 +60,9 @@ extern void vunmap(void *addr);
 extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags);
 extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
 					unsigned long start, unsigned long end);
+extern struct vm_struct * get_vm_area_best(unsigned long size,
+					   unsigned long flags);
+extern void vprintstat(void);
 extern struct vm_struct *get_vm_area_node(unsigned long size,
 					unsigned long flags, int node);
 extern struct vm_struct *remove_vm_area(void *addr);
diff -upr linux-2.6.16.46-0.12.orig/include/linux/vsched.h linux-2.6.16.46-0.12-027test011/include/linux/vsched.h
--- linux-2.6.16.46-0.12.orig/include/linux/vsched.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/vsched.h	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,27 @@
+/*
+ *  include/linux/vsched.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __VSCHED_H__
+#define __VSCHED_H__
+
+#include <linux/config.h>
+#include <linux/cache.h>
+#include <linux/fairsched.h>
+#include <linux/sched.h>
+
+extern int vsched_create(int id, struct fairsched_node *node);
+extern int vsched_destroy(struct vcpu_scheduler *vsched);
+
+extern int vsched_mvpr(struct task_struct *p, struct vcpu_scheduler *vsched);
+extern int vsched_set_vcpus(struct vcpu_scheduler *vsched, unsigned int vcpus);
+
+unsigned long ve_scale_khz(unsigned long khz);
+
+#endif
diff -upr linux-2.6.16.46-0.12.orig/include/linux/vzcalluser.h linux-2.6.16.46-0.12-027test011/include/linux/vzcalluser.h
--- linux-2.6.16.46-0.12.orig/include/linux/vzcalluser.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/vzcalluser.h	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,240 @@
+/*
+ *  include/linux/vzcalluser.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _LINUX_VZCALLUSER_H
+#define _LINUX_VZCALLUSER_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define KERN_VZ_PRIV_RANGE 51
+
+#ifndef __ENVID_T_DEFINED__
+typedef unsigned envid_t;
+#define __ENVID_T_DEFINED__
+#endif
+
+#ifndef __KERNEL__
+#define __user
+#endif
+
+/*
+ * VE management ioctls
+ */
+
+struct vzctl_old_env_create {
+	envid_t veid;
+	unsigned flags;
+#define VE_CREATE 	1	/* Create VE, VE_ENTER added automatically */
+#define VE_EXCLUSIVE	2	/* Fail if exists */
+#define VE_ENTER	4	/* Enter existing VE */
+#define VE_TEST		8	/* Test if VE exists */
+#define VE_LOCK		16	/* Do not allow entering created VE */
+#define VE_SKIPLOCK	32	/* Allow entering embrion VE */
+	__u32 addr;
+};
+
+struct vzctl_mark_env_to_down {
+	envid_t veid;
+};
+
+struct vzctl_setdevperms {
+	envid_t veid;
+	unsigned type;
+#define VE_USE_MAJOR	010	/* Test MAJOR supplied in rule */
+#define VE_USE_MINOR	030	/* Test MINOR supplied in rule */
+#define VE_USE_MASK	030	/* Testing mask, VE_USE_MAJOR|VE_USE_MINOR */
+	unsigned dev;
+	unsigned mask;
+};
+
+struct vzctl_ve_netdev {
+	envid_t veid;
+	int op;
+#define VE_NETDEV_ADD  1
+#define VE_NETDEV_DEL  2
+	char __user *dev_name;
+};
+
+struct vzctl_ve_meminfo {
+	envid_t veid;
+	unsigned long val;
+};
+
+/* these masks represent modules */
+#define VE_IP_IPTABLES_MOD		(1U<<0)
+#define VE_IP_FILTER_MOD		(1U<<1)
+#define VE_IP_MANGLE_MOD		(1U<<2)
+#define VE_IP_CONNTRACK_MOD		(1U<<14)
+#define VE_IP_CONNTRACK_FTP_MOD		(1U<<15)
+#define VE_IP_CONNTRACK_IRC_MOD		(1U<<16)
+#define VE_IP_NAT_MOD			(1U<<20)
+#define VE_IP_NAT_FTP_MOD		(1U<<21)
+#define VE_IP_NAT_IRC_MOD		(1U<<22)
+#define VE_IP_IPTABLES6_MOD		(1U<<26)
+#define VE_IP_FILTER6_MOD		(1U<<27)
+#define VE_IP_MANGLE6_MOD		(1U<<28)
+#define VE_IP_IPTABLE_NAT_MOD		(1U<<29)
+
+/* these masks represent modules with their dependences */
+#define VE_IP_IPTABLES		(VE_IP_IPTABLES_MOD)
+#define VE_IP_FILTER		(VE_IP_FILTER_MOD		\
+					| VE_IP_IPTABLES)
+#define VE_IP_MANGLE		(VE_IP_MANGLE_MOD		\
+					| VE_IP_IPTABLES)
+#define VE_IP_IPTABLES6		(VE_IP_IPTABLES6_MOD)
+#define VE_IP_FILTER6		(VE_IP_FILTER6_MOD | VE_IP_IPTABLES6)
+#define VE_IP_MANGLE6		(VE_IP_MANGLE6_MOD | VE_IP_IPTABLES6)
+#define VE_IP_CONNTRACK		(VE_IP_CONNTRACK_MOD		\
+					| VE_IP_IPTABLES)
+#define VE_IP_CONNTRACK_FTP	(VE_IP_CONNTRACK_FTP_MOD	\
+					| VE_IP_CONNTRACK)
+#define VE_IP_CONNTRACK_IRC	(VE_IP_CONNTRACK_IRC_MOD	\
+					| VE_IP_CONNTRACK)
+#define VE_IP_NAT		(VE_IP_NAT_MOD			\
+					| VE_IP_CONNTRACK)
+#define VE_IP_NAT_FTP		(VE_IP_NAT_FTP_MOD		\
+					| VE_IP_NAT | VE_IP_CONNTRACK_FTP)
+#define VE_IP_NAT_IRC		(VE_IP_NAT_IRC_MOD		\
+					| VE_IP_NAT | VE_IP_CONNTRACK_IRC)
+#define VE_IP_IPTABLE_NAT	(VE_IP_IPTABLE_NAT_MOD | VE_IP_CONNTRACK)
+
+/* safe iptables mask to be used by default */
+#define VE_IP_DEFAULT					\
+	(VE_IP_IPTABLES |				\
+	VE_IP_FILTER | VE_IP_MANGLE)
+
+#define VE_IPT_CMP(x,y)		(((x) & (y)) == (y))
+
+struct vzctl_env_create_cid {
+	envid_t veid;
+	unsigned flags;
+	__u32 class_id;
+};
+
+struct vzctl_env_create {
+	envid_t veid;
+	unsigned flags;
+	__u32 class_id;
+};
+
+struct env_create_param {
+	__u64 iptables_mask;
+};
+
+#define VZCTL_ENV_CREATE_DATA_MINLEN	sizeof(struct env_create_param)
+
+struct env_create_param2 {
+	__u64 iptables_mask;
+	__u64 feature_mask;
+	__u32 total_vcpus;	/* 0 - don't care, same as in host */
+};
+
+struct env_create_param3 {
+	__u64 iptables_mask;
+	__u64 feature_mask;
+	__u32 total_vcpus;
+	__u32 pad;
+	__u64 known_features;
+};
+
+#define VE_FEATURE_SYSFS	(1ULL << 0)
+#define VE_FEATURE_NFS		(1ULL << 1)
+#define VE_FEATURE_DEF_PERMS	(1ULL << 2)
+
+#define VE_FEATURES_OLD		(VE_FEATURE_SYSFS)
+#define VE_FEATURES_DEF		(VE_FEATURE_SYSFS | \
+				 VE_FEATURE_DEF_PERMS)
+
+typedef struct env_create_param3 env_create_param_t;
+#define VZCTL_ENV_CREATE_DATA_MAXLEN	sizeof(env_create_param_t)
+
+struct vzctl_env_create_data {
+	envid_t veid;
+	unsigned flags;
+	__u32 class_id;
+	env_create_param_t __user *data;
+	int datalen;
+};
+
+struct vz_load_avg {
+	int val_int;
+	int val_frac;
+};
+
+struct vz_cpu_stat {
+	unsigned long user_jif;
+	unsigned long nice_jif;
+	unsigned long system_jif;
+	unsigned long uptime_jif;
+	__u64 idle_clk;
+	__u64 strv_clk;
+	__u64 uptime_clk;
+	struct vz_load_avg avenrun[3];	/* loadavg data */
+};
+
+struct vzctl_cpustatctl {
+	envid_t veid;
+	struct vz_cpu_stat __user *cpustat;
+};
+
+#define VZCTLTYPE '.'
+#define VZCTL_OLD_ENV_CREATE	_IOW(VZCTLTYPE, 0,			\
+					struct vzctl_old_env_create)
+#define VZCTL_MARK_ENV_TO_DOWN	_IOW(VZCTLTYPE, 1,			\
+					struct vzctl_mark_env_to_down)
+#define VZCTL_SETDEVPERMS	_IOW(VZCTLTYPE, 2,			\
+					struct vzctl_setdevperms)
+#define VZCTL_ENV_CREATE_CID	_IOW(VZCTLTYPE, 4,			\
+					struct vzctl_env_create_cid)
+#define VZCTL_ENV_CREATE	_IOW(VZCTLTYPE, 5,			\
+					struct vzctl_env_create)
+#define VZCTL_GET_CPU_STAT	_IOW(VZCTLTYPE, 6,			\
+					struct vzctl_cpustatctl)
+#define VZCTL_ENV_CREATE_DATA	_IOW(VZCTLTYPE, 10,			\
+					struct vzctl_env_create_data)
+#define VZCTL_VE_NETDEV		_IOW(VZCTLTYPE, 11,			\
+					struct vzctl_ve_netdev)
+#define VZCTL_VE_MEMINFO	_IOW(VZCTLTYPE, 13,                     \
+					struct vzctl_ve_meminfo)
+
+#ifdef __KERNEL__
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+
+struct compat_vzctl_ve_netdev {
+	envid_t veid;
+	int op;
+	compat_uptr_t dev_name;
+};
+
+struct compat_vzctl_ve_meminfo {
+	envid_t veid;
+	compat_ulong_t val;
+};
+
+struct compat_vzctl_env_create_data {
+	envid_t veid;
+	unsigned flags;
+	__u32 class_id;
+	compat_uptr_t data;
+	int datalen;
+};
+
+#define VZCTL_COMPAT_ENV_CREATE_DATA _IOW(VZCTLTYPE, 10,		\
+					struct compat_vzctl_env_create_data)
+#define VZCTL_COMPAT_VE_NETDEV	_IOW(VZCTLTYPE, 11,			\
+					struct compat_vzctl_ve_netdev)
+#define VZCTL_COMPAT_VE_MEMINFO	_IOW(VZCTLTYPE, 13,                     \
+					struct compat_vzctl_ve_meminfo)
+#endif
+#endif
+
+#endif
diff -upr linux-2.6.16.46-0.12.orig/include/linux/vzctl.h linux-2.6.16.46-0.12-027test011/include/linux/vzctl.h
--- linux-2.6.16.46-0.12.orig/include/linux/vzctl.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/vzctl.h	2007-08-28 17:35:31.000000000 +0400
@@ -0,0 +1,30 @@
+/*
+ *  include/linux/vzctl.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _LINUX_VZCTL_H
+#define _LINUX_VZCTL_H
+
+#include <linux/list.h>
+
+struct module;
+struct inode;
+struct file;
+struct vzioctlinfo {
+	unsigned type;
+	int (*ioctl)(struct file *, unsigned int, unsigned long);
+	int (*compat_ioctl)(struct file *, unsigned int, unsigned long);
+	struct module *owner;
+	struct list_head list;
+};
+
+extern void vzioctl_register(struct vzioctlinfo *inf);
+extern void vzioctl_unregister(struct vzioctlinfo *inf);
+
+#endif
diff -upr linux-2.6.16.46-0.12.orig/include/linux/vzctl_quota.h linux-2.6.16.46-0.12-027test011/include/linux/vzctl_quota.h
--- linux-2.6.16.46-0.12.orig/include/linux/vzctl_quota.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/vzctl_quota.h	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,74 @@
+/*
+ *  include/linux/vzctl_quota.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __LINUX_VZCTL_QUOTA_H__
+#define __LINUX_VZCTL_QUOTA_H__
+
+#include <linux/compat.h>
+
+#ifndef __KERNEL__
+#define __user
+#endif
+
+/*
+ * Quota management ioctl
+ */
+
+struct vz_quota_stat;
+struct vzctl_quotactl {
+	int cmd;
+	unsigned int quota_id;
+	struct vz_quota_stat __user *qstat;
+	char __user *ve_root;
+};
+
+struct vzctl_quotaugidctl {
+	int cmd;		/* subcommand */
+	unsigned int quota_id;	/* quota id where it applies to */
+	unsigned int ugid_index;/* for reading statistic. index of first
+				    uid/gid record to read */
+	unsigned int ugid_size;	/* size of ugid_buf array */
+	void *addr; 		/* user-level buffer */
+};
+
+#define VZDQCTLTYPE '+'
+#define VZCTL_QUOTA_DEPR_CTL	_IOWR(VZDQCTLTYPE, 1,			\
+					struct vzctl_quotactl)
+#define VZCTL_QUOTA_NEW_CTL	_IOWR(VZDQCTLTYPE, 2,			\
+					struct vzctl_quotactl)
+#define VZCTL_QUOTA_UGID_CTL	_IOWR(VZDQCTLTYPE, 3,			\
+					struct vzctl_quotaugidctl)
+
+#ifdef __KERNEL__
+#ifdef CONFIG_COMPAT
+struct compat_vzctl_quotactl {
+	int cmd;
+	unsigned int quota_id;
+	compat_uptr_t qstat;
+	compat_uptr_t ve_root;
+};
+
+struct compat_vzctl_quotaugidctl {
+	int cmd;		/* subcommand */
+	unsigned int quota_id;	/* quota id where it applies to */
+	unsigned int ugid_index;/* for reading statistic. index of first
+				    uid/gid record to read */
+	unsigned int ugid_size;	/* size of ugid_buf array */
+	compat_uptr_t addr; 	/* user-level buffer */
+};
+
+#define VZCTL_COMPAT_QUOTA_CTL	_IOWR(VZDQCTLTYPE, 2,			\
+					struct compat_vzctl_quotactl)
+#define VZCTL_COMPAT_QUOTA_UGID_CTL _IOWR(VZDQCTLTYPE, 3,		\
+					struct compat_vzctl_quotaugidctl)
+#endif
+#endif
+
+#endif /* __LINUX_VZCTL_QUOTA_H__ */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/vzctl_venet.h linux-2.6.16.46-0.12-027test011/include/linux/vzctl_venet.h
--- linux-2.6.16.46-0.12.orig/include/linux/vzctl_venet.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/vzctl_venet.h	2007-08-28 17:35:32.000000000 +0400
@@ -0,0 +1,51 @@
+/*
+ *  include/linux/vzctl_venet.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _VZCTL_VENET_H
+#define _VZCTL_VENET_H
+
+#include <linux/types.h>
+#include <linux/compat.h>
+#include <linux/ioctl.h>
+
+#ifndef __ENVID_T_DEFINED__
+typedef unsigned envid_t;
+#define __ENVID_T_DEFINED__
+#endif
+
+struct vzctl_ve_ip_map {
+	envid_t veid;
+	int op;
+#define VE_IP_ADD	1
+#define VE_IP_DEL	2
+	struct sockaddr *addr;
+	int addrlen;
+};
+
+#define VENETCTLTYPE '('
+
+#define VENETCTL_VE_IP_MAP	_IOW(VENETCTLTYPE, 3,			\
+					struct vzctl_ve_ip_map)
+
+#ifdef __KERNEL__
+#ifdef CONFIG_COMPAT
+struct compat_vzctl_ve_ip_map {
+	envid_t veid;
+	int op;
+	compat_uptr_t addr;
+	int addrlen;
+};
+
+#define VENETCTL_COMPAT_VE_IP_MAP _IOW(VENETCTLTYPE, 3,			\
+					struct compat_vzctl_ve_ip_map)
+#endif
+#endif
+
+#endif
diff -upr linux-2.6.16.46-0.12.orig/include/linux/vzctl_veth.h linux-2.6.16.46-0.12-027test011/include/linux/vzctl_veth.h
--- linux-2.6.16.46-0.12.orig/include/linux/vzctl_veth.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/vzctl_veth.h	2007-08-28 17:35:32.000000000 +0400
@@ -0,0 +1,42 @@
+/*
+ *  include/linux/vzctl_veth.h
+ *
+ *  Copyright (C) 2006  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _VZCTL_VETH_H
+#define _VZCTL_VETH_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#ifndef __ENVID_T_DEFINED__
+typedef unsigned envid_t;
+#define __ENVID_T_DEFINED__
+#endif
+
+struct vzctl_ve_hwaddr {
+	envid_t veid;
+	int op;
+#define VE_ETH_ADD			1
+#define VE_ETH_DEL			2
+#define VE_ETH_ALLOW_MAC_CHANGE		3
+#define VE_ETH_DENY_MAC_CHANGE		4
+	unsigned char	dev_addr[6];
+	int addrlen;
+	char		dev_name[16];
+	unsigned char	dev_addr_ve[6];
+	int addrlen_ve;
+	char		dev_name_ve[16];
+};
+
+#define VETHCTLTYPE '['
+
+#define VETHCTL_VE_HWADDR	_IOW(VETHCTLTYPE, 3,			\
+					struct vzctl_ve_hwaddr)
+
+#endif
diff -upr linux-2.6.16.46-0.12.orig/include/linux/vzdq_tree.h linux-2.6.16.46-0.12-027test011/include/linux/vzdq_tree.h
--- linux-2.6.16.46-0.12.orig/include/linux/vzdq_tree.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/vzdq_tree.h	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,99 @@
+/*
+ *
+ * Copyright (C) 2005 SWsoft
+ * All rights reserved.
+ *
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * This file contains Virtuozzo disk quota tree definition
+ */
+
+#ifndef _VZDQ_TREE_H
+#define _VZDQ_TREE_H
+
+#include <linux/list.h>
+#include <asm/string.h>
+
+typedef unsigned int quotaid_t;
+#define QUOTAID_BITS		32
+#define QUOTAID_BBITS		4
+#define QUOTAID_EBITS		8
+
+#if QUOTAID_EBITS % QUOTAID_BBITS
+#error Quota bit assumption failure
+#endif
+
+#define QUOTATREE_BSIZE		(1 << QUOTAID_BBITS)
+#define QUOTATREE_BMASK		(QUOTATREE_BSIZE - 1)
+#define QUOTATREE_DEPTH		((QUOTAID_BITS + QUOTAID_BBITS - 1) \
+							/ QUOTAID_BBITS)
+#define QUOTATREE_EDEPTH	((QUOTAID_BITS + QUOTAID_EBITS - 1) \
+							/ QUOTAID_EBITS)
+#define QUOTATREE_BSHIFT(lvl)	((QUOTATREE_DEPTH - (lvl) - 1) * QUOTAID_BBITS)
+
+/*
+ * Depth of keeping unused node (not inclusive).
+ * 0 means release all nodes including root,
+ * QUOTATREE_DEPTH means never release nodes.
+ * Current value: release all nodes strictly after QUOTATREE_EDEPTH
+ * (measured in external shift units).
+ */
+#define QUOTATREE_CDEPTH	(QUOTATREE_DEPTH \
+				- 2 * QUOTATREE_DEPTH / QUOTATREE_EDEPTH \
+				+ 1)
+
+/*
+ * Levels 0..(QUOTATREE_DEPTH-1) are tree nodes.
+ * On level i the maximal number of nodes is 2^(i*QUOTAID_BBITS),
+ * and each node contains 2^QUOTAID_BBITS pointers.
+ * Level 0 is a (single) tree root node.
+ *
+ * Nodes of level (QUOTATREE_DEPTH-1) contain pointers to caller's data.
+ * Nodes of lower levels contain pointers to nodes.
+ *
+ * Double pointer in array of i-level node, pointing to a (i+1)-level node
+ * (such as inside quotatree_find_state) are marked by level (i+1), not i.
+ * Level 0 double pointer is a pointer to root inside tree struct.
+ *
+ * The tree is permanent, i.e. all index blocks allocated are keeped alive to
+ * preserve the blocks numbers in the quota file tree to keep its changes
+ * locally.
+ */
+struct quotatree_node {
+	struct list_head list;
+	quotaid_t num;
+	void *blocks[QUOTATREE_BSIZE];
+};
+
+struct quotatree_level {
+	struct list_head usedlh, freelh;
+	quotaid_t freenum;
+};
+
+struct quotatree_tree {
+	struct quotatree_level levels[QUOTATREE_DEPTH];
+	struct quotatree_node *root;
+	unsigned int leaf_num;
+};
+
+struct quotatree_find_state {
+	void **block;
+	int level;
+};
+
+/* number of leafs (objects) and leaf level of the tree */
+#define QTREE_LEAFNUM(tree)	((tree)->leaf_num)
+#define QTREE_LEAFLVL(tree)	(&(tree)->levels[QUOTATREE_DEPTH - 1])
+
+struct quotatree_tree *quotatree_alloc(void);
+void *quotatree_find(struct quotatree_tree *tree, quotaid_t id,
+		struct quotatree_find_state *st);
+int quotatree_insert(struct quotatree_tree *tree, quotaid_t id,
+		struct quotatree_find_state *st, void *data);
+void quotatree_remove(struct quotatree_tree *tree, quotaid_t id);
+void quotatree_free(struct quotatree_tree *tree, void (*dtor)(void *));
+void *quotatree_get_next(struct quotatree_tree *tree, quotaid_t id);
+void *quotatree_leaf_byindex(struct quotatree_tree *tree, unsigned int index);
+
+#endif /* _VZDQ_TREE_H */
+
diff -upr linux-2.6.16.46-0.12.orig/include/linux/vzevent.h linux-2.6.16.46-0.12-027test011/include/linux/vzevent.h
--- linux-2.6.16.46-0.12.orig/include/linux/vzevent.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/vzevent.h	2007-08-28 17:35:31.000000000 +0400
@@ -0,0 +1,13 @@
+#ifndef __LINUX_VZ_EVENT_H__
+#define __LINUX_VZ_EVENT_H__
+
+#if defined(CONFIG_VZ_EVENT) || defined(CONFIG_VZ_EVENT_MODULE)
+extern int vzevent_send(int msg, const char *attrs_fmt, ...);
+#else
+static inline int vzevent_send(int msg, const char *attrs_fmt, ...)
+{
+	return 0;
+}
+#endif
+
+#endif /* __LINUX_VZ_EVENT_H__ */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/vzquota.h linux-2.6.16.46-0.12-027test011/include/linux/vzquota.h
--- linux-2.6.16.46-0.12.orig/include/linux/vzquota.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/vzquota.h	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,378 @@
+/*
+ *
+ * Copyright (C) 2001-2005 SWsoft
+ * All rights reserved.
+ *
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * This file contains Virtuozzo disk quota implementation
+ */
+
+#ifndef _VZDQUOTA_H
+#define _VZDQUOTA_H
+
+#include <linux/types.h>
+#include <linux/quota.h>
+
+/* vzquotactl syscall commands */
+#define VZ_DQ_CREATE		5 /* create quota master block */
+#define VZ_DQ_DESTROY		6 /* destroy qmblk */
+#define VZ_DQ_ON		7 /* mark dentry with already created qmblk */
+#define VZ_DQ_OFF		8 /* remove mark, don't destroy qmblk */
+#define VZ_DQ_SETLIMIT		9 /* set new limits */
+#define VZ_DQ_GETSTAT		10 /* get usage statistic */
+/* set of syscalls to maintain UGID quotas */
+#define VZ_DQ_UGID_GETSTAT	1 /* get usage/limits for ugid(s) */
+#define VZ_DQ_UGID_ADDSTAT	2 /* set usage/limits statistic for ugid(s) */
+#define VZ_DQ_UGID_GETGRACE	3 /* get expire times */
+#define VZ_DQ_UGID_SETGRACE	4 /* set expire times */
+#define VZ_DQ_UGID_GETCONFIG	5 /* get ugid_max limit, cnt, flags of qmblk */
+#define VZ_DQ_UGID_SETCONFIG	6 /* set ugid_max limit, flags of qmblk */
+#define VZ_DQ_UGID_SETLIMIT	7 /* set ugid B/I limits */
+#define VZ_DQ_UGID_SETINFO	8 /* set ugid info */
+
+/* common structure for vz and ugid quota */
+struct dq_stat {
+	/* blocks limits */
+	__u64	bhardlimit;	/* absolute limit in bytes */
+	__u64	bsoftlimit;	/* preferred limit in bytes */
+	time_t	btime;		/* time limit for excessive disk use */
+	__u64	bcurrent;	/* current bytes count */
+	/* inodes limits */
+	__u32	ihardlimit;	/* absolute limit on allocated inodes */
+	__u32	isoftlimit;	/* preferred inode limit */
+	time_t	itime;		/* time limit for excessive inode use */
+	__u32	icurrent;	/* current # allocated inodes */
+};
+
+/* One second resolution for grace times */
+#define CURRENT_TIME_SECONDS	(get_seconds())
+
+/* Values for dq_info->flags */
+#define VZ_QUOTA_INODES 0x01       /* inodes limit warning printed */
+#define VZ_QUOTA_SPACE  0x02       /* space limit warning printed */
+
+struct dq_info {
+	time_t		bexpire;   /* expire timeout for excessive disk use */
+	time_t		iexpire;   /* expire timeout for excessive inode use */
+	unsigned	flags;	   /* see previos defines */
+};
+
+struct vz_quota_stat  {
+	struct dq_stat dq_stat;
+	struct dq_info dq_info;
+};
+
+/* UID/GID interface record - for user-kernel level exchange */
+struct vz_quota_iface {
+	unsigned int	qi_id;	   /* UID/GID this applies to */
+	unsigned int	qi_type;   /* USRQUOTA|GRPQUOTA */
+	struct dq_stat	qi_stat;   /* limits, options, usage stats */
+};
+
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+struct compat_dq_stat {
+	/* blocks limits */
+	__u64	bhardlimit;	/* absolute limit in bytes */
+	__u64	bsoftlimit;	/* preferred limit in bytes */
+	compat_time_t btime;	/* time limit for excessive disk use */
+	__u64	bcurrent;	/* current bytes count */
+	/* inodes limits */
+	__u32	ihardlimit;	/* absolute limit on allocated inodes */
+	__u32	isoftlimit;	/* preferred inode limit */
+	compat_time_t itime;	/* time limit for excessive inode use */
+	__u32	icurrent;	/* current # allocated inodes */
+};
+
+struct compat_dq_info {
+	compat_time_t	bexpire;   /* expire timeout for excessive disk use */
+	compat_time_t	iexpire;   /* expire timeout for excessive inode use */
+	unsigned	flags;	   /* see previos defines */
+};
+
+struct compat_vz_quota_stat  {
+	struct compat_dq_stat dq_stat;
+	struct compat_dq_info dq_info;
+};
+
+struct compat_vz_quota_iface {
+	unsigned int	qi_id;	   /* UID/GID this applies to */
+	unsigned int	qi_type;   /* USRQUOTA|GRPQUOTA */
+	struct compat_dq_stat qi_stat;   /* limits, options, usage stats */
+};
+
+static inline void compat_dqstat2dqstat(struct compat_dq_stat *odqs,
+				struct dq_stat *dqs)
+{
+	dqs->bhardlimit = odqs->bhardlimit;
+	dqs->bsoftlimit = odqs->bsoftlimit;
+	dqs->bcurrent = odqs->bcurrent;
+	dqs->btime = odqs->btime;
+
+	dqs->ihardlimit = odqs->ihardlimit;
+	dqs->isoftlimit = odqs->isoftlimit;
+	dqs->icurrent = odqs->icurrent;
+	dqs->itime = odqs->itime;
+}
+
+static inline void compat_dqinfo2dqinfo(struct compat_dq_info *odqi,
+				struct dq_info *dqi)
+{
+	dqi->bexpire = odqi->bexpire;
+	dqi->iexpire = odqi->iexpire;
+	dqi->flags = odqi->flags;
+}
+
+static inline void dqstat2compat_dqstat(struct dq_stat *dqs,
+				struct compat_dq_stat *odqs)
+{
+	odqs->bhardlimit = dqs->bhardlimit;
+	odqs->bsoftlimit = dqs->bsoftlimit;
+	odqs->bcurrent = dqs->bcurrent;
+	odqs->btime = (compat_time_t)dqs->btime;
+
+	odqs->ihardlimit = dqs->ihardlimit;
+	odqs->isoftlimit = dqs->isoftlimit;
+	odqs->icurrent = dqs->icurrent;
+	odqs->itime = (compat_time_t)dqs->itime;
+}
+
+static inline void dqinfo2compat_dqinfo(struct dq_info *dqi,
+				struct compat_dq_info *odqi)
+{
+	odqi->bexpire = (compat_time_t)dqi->bexpire;
+	odqi->iexpire = (compat_time_t)dqi->iexpire;
+	odqi->flags = dqi->flags;
+}
+#endif
+
+/* values for flags and dq_flags */
+/* this flag is set if the userspace has been unable to provide usage
+ * information about all ugids
+ * if the flag is set, we don't allocate new UG quota blocks (their
+ * current usage is unknown) or free existing UG quota blocks (not to
+ * lose information that this block is ok) */
+#define VZDQUG_FIXED_SET	0x01
+/* permit to use ugid quota */
+#define VZDQUG_ON		0x02
+#define VZDQ_USRQUOTA		0x10
+#define VZDQ_GRPQUOTA		0x20
+#define VZDQ_NOACT		0x1000	/* not actual */
+#define VZDQ_NOQUOT		0x2000	/* not under quota tree */
+
+struct vz_quota_ugid_stat {
+	unsigned int	limit;	/* max amount of ugid records */
+	unsigned int	count;	/* amount of ugid records */
+	unsigned int	flags;
+};
+
+struct vz_quota_ugid_setlimit {
+	unsigned int	type;	/* quota type (USR/GRP) */
+	unsigned int	id;	/* ugid */
+	struct if_dqblk dqb;	/* limits info */
+};
+
+struct vz_quota_ugid_setinfo {
+	unsigned int	type;	/* quota type (USR/GRP) */
+	struct if_dqinfo dqi;	/* grace info */
+};
+
+#ifdef __KERNEL__
+#include <linux/list.h>
+#include <asm/atomic.h>
+#include <asm/semaphore.h>
+#include <linux/time.h>
+#include <linux/vzquota_qlnk.h>
+#include <linux/vzdq_tree.h>
+
+/* Values for dq_info flags */
+#define VZ_QUOTA_INODES	0x01	   /* inodes limit warning printed */
+#define VZ_QUOTA_SPACE	0x02	   /* space limit warning printed */
+
+/* values for dq_state */
+#define VZDQ_STARTING		0 /* created, not turned on yet */
+#define VZDQ_WORKING		1 /* quota created, turned on */
+#define VZDQ_STOPING		2 /* created, turned on and off */
+
+/* master quota record - one per veid */
+struct vz_quota_master {
+	struct list_head	dq_hash;	/* next quota in hash list */
+	atomic_t		dq_count;	/* inode reference count */
+	unsigned int		dq_flags;	/* see VZDQUG_FIXED_SET */
+	unsigned int		dq_state;	/* see values above */
+	unsigned int		dq_id;		/* VEID this applies to */
+	struct dq_stat		dq_stat; 	/* limits, grace, usage stats */
+	struct dq_info		dq_info;	/* grace times and flags */
+	spinlock_t		dq_data_lock;	/* for dq_stat */
+
+	struct semaphore	dq_sem;		/* semaphore to protect
+						   ugid tree */
+
+	struct list_head	dq_ilink_list;	/* list of vz_quota_ilink */
+	struct quotatree_tree	*dq_uid_tree;	/* vz_quota_ugid tree for UIDs */
+	struct quotatree_tree	*dq_gid_tree;	/* vz_quota_ugid tree for GIDs */
+	unsigned int		dq_ugid_count;	/* amount of ugid records */
+	unsigned int		dq_ugid_max;	/* max amount of ugid records */
+	struct dq_info		dq_ugid_info[MAXQUOTAS]; /* ugid grace times */
+
+	struct dentry		*dq_root_dentry;/* dentry of fs tree */
+	struct vfsmount		*dq_root_mnt;	/* vfsmnt of this dentry */
+	struct super_block	*dq_sb;	      /* superblock of our quota root */
+};
+
+/* UID/GID quota record - one per pair (quota_master, uid or gid) */
+struct vz_quota_ugid {
+	unsigned int		qugid_id;     /* UID/GID this applies to */
+	struct dq_stat		qugid_stat;   /* limits, options, usage stats */
+	int			qugid_type;   /* USRQUOTA|GRPQUOTA */
+	atomic_t		qugid_count;  /* reference count */
+};
+
+#define VZ_QUOTA_UGBAD		((struct vz_quota_ugid *)0xfeafea11)
+
+struct vz_quota_datast {
+	struct vz_quota_ilink qlnk;
+};
+
+#define VIRTINFO_QUOTA_GETSTAT	0
+#define VIRTINFO_QUOTA_ON	1
+#define VIRTINFO_QUOTA_OFF	2
+#define VIRTINFO_QUOTA_DISABLE	3
+
+struct virt_info_quota {
+	struct super_block *super;
+	struct dq_stat *qstat;
+};
+
+/*
+ * Interface to VZ quota core
+ */
+#define INODE_QLNK(inode)	(&(inode)->i_qlnk)
+#define QLNK_INODE(qlnk)	container_of((qlnk), struct inode, i_qlnk)
+
+#define VZ_QUOTA_BAD		((struct vz_quota_master *)0xefefefef)
+
+#define VZ_QUOTAO_SETE		1
+#define VZ_QUOTAO_INIT		2
+#define VZ_QUOTAO_DESTR		3
+#define VZ_QUOTAO_SWAP		4
+#define VZ_QUOTAO_INICAL	5
+#define VZ_QUOTAO_DRCAL		6
+#define VZ_QUOTAO_QSET		7
+#define VZ_QUOTAO_TRANS		8
+#define VZ_QUOTAO_ACT		9
+#define VZ_QUOTAO_DTREE		10
+#define VZ_QUOTAO_DET		11
+#define VZ_QUOTAO_ON		12
+#define VZ_QUOTAO_RE_LOCK	13
+
+#define DQUOT_CMD_ALLOC		0
+#define DQUOT_CMD_PREALLOC	1
+#define DQUOT_CMD_CHECK		12
+#define DQUOT_CMD_FORCE		13
+
+extern struct semaphore vz_quota_sem;
+void inode_qmblk_lock(struct super_block *sb);
+void inode_qmblk_unlock(struct super_block *sb);
+void qmblk_data_read_lock(struct vz_quota_master *qmblk);
+void qmblk_data_read_unlock(struct vz_quota_master *qmblk);
+void qmblk_data_write_lock(struct vz_quota_master *qmblk);
+void qmblk_data_write_unlock(struct vz_quota_master *qmblk);
+
+/* for quota operations */
+void vzquota_inode_init_call(struct inode *inode);
+void vzquota_inode_drop_call(struct inode *inode);
+int vzquota_inode_transfer_call(struct inode *, struct iattr *);
+struct vz_quota_master *vzquota_inode_data(struct inode *inode,
+		struct vz_quota_datast *);
+void vzquota_data_unlock(struct inode *inode, struct vz_quota_datast *);
+int vzquota_rename_check(struct inode *inode,
+		struct inode *old_dir, struct inode *new_dir);
+struct vz_quota_master *vzquota_inode_qmblk(struct inode *inode);
+/* for second-level quota */
+struct vz_quota_master *vzquota_find_qmblk(struct super_block *);
+/* for management operations */
+struct vz_quota_master *vzquota_alloc_master(unsigned int quota_id,
+		struct vz_quota_stat *qstat);
+void vzquota_free_master(struct vz_quota_master *);
+struct vz_quota_master *vzquota_find_master(unsigned int quota_id);
+int vzquota_on_qmblk(struct super_block *sb, struct inode *inode,
+		struct vz_quota_master *qmblk);
+int vzquota_off_qmblk(struct super_block *sb, struct vz_quota_master *qmblk);
+int vzquota_get_super(struct super_block *sb);
+void vzquota_put_super(struct super_block *sb);
+
+static inline struct vz_quota_master *qmblk_get(struct vz_quota_master *qmblk)
+{
+	if (!atomic_read(&qmblk->dq_count))
+		BUG();
+	atomic_inc(&qmblk->dq_count);
+	return qmblk;
+}
+
+static inline void __qmblk_put(struct vz_quota_master *qmblk)
+{
+	atomic_dec(&qmblk->dq_count);
+}
+
+static inline void qmblk_put(struct vz_quota_master *qmblk)
+{
+	if (!atomic_dec_and_test(&qmblk->dq_count))
+		return;
+	vzquota_free_master(qmblk);
+}
+
+extern struct list_head vzquota_hash_table[];
+extern int vzquota_hash_size;
+
+/*
+ * Interface to VZ UGID quota
+ */
+extern struct quotactl_ops vz_quotactl_operations;
+extern struct dquot_operations vz_quota_operations2;
+extern struct quota_format_type vz_quota_empty_v2_format;
+
+#define QUGID_TREE(qmblk, type)	(((type) == USRQUOTA) ?		\
+					qmblk->dq_uid_tree :	\
+					qmblk->dq_gid_tree)
+
+#define VZDQUG_FIND_DONT_ALLOC	1
+#define VZDQUG_FIND_FAKE	2
+struct vz_quota_ugid *vzquota_find_ugid(struct vz_quota_master *qmblk,
+		unsigned int quota_id, int type, int flags);
+struct vz_quota_ugid *__vzquota_find_ugid(struct vz_quota_master *qmblk,
+		unsigned int quota_id, int type, int flags);
+struct vz_quota_ugid *vzquota_get_ugid(struct vz_quota_ugid *qugid);
+void vzquota_put_ugid(struct vz_quota_master *qmblk,
+		struct vz_quota_ugid *qugid);
+void vzquota_kill_ugid(struct vz_quota_master *qmblk);
+int vzquota_ugid_init(void);
+void vzquota_ugid_release(void);
+int vzquota_transfer_usage(struct inode *inode, int mask,
+		struct vz_quota_ilink *qlnk);
+void vzquota_inode_off(struct inode *inode);
+
+long do_vzquotaugidctl(int cmd, unsigned int quota_id,
+		unsigned int ugid_index, unsigned int ugid_size,
+		void *addr, int compat);
+
+/*
+ * Other VZ quota parts
+ */
+extern struct dquot_operations vz_quota_operations;
+
+long do_vzquotactl(int cmd, unsigned int quota_id,
+		struct vz_quota_stat __user *qstat, const char __user *ve_root,
+		int compat);
+int vzquota_proc_init(void);
+void vzquota_proc_release(void);
+struct vz_quota_master *vzquota_find_qmblk(struct super_block *);
+extern struct semaphore vz_quota_sem;
+
+void vzaquota_init(void);
+void vzaquota_fini(void);
+
+#endif /* __KERNEL__ */
+
+#endif /* _VZDQUOTA_H */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/vzquota_qlnk.h linux-2.6.16.46-0.12-027test011/include/linux/vzquota_qlnk.h
--- linux-2.6.16.46-0.12.orig/include/linux/vzquota_qlnk.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/vzquota_qlnk.h	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,25 @@
+/*
+ *  include/linux/vzquota_qlnk.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _VZDQUOTA_QLNK_H
+#define _VZDQUOTA_QLNK_H
+
+struct vz_quota_master;
+struct vz_quota_ugid;
+
+/* inode link, used to track inodes using quota via dq_ilink_list */
+struct vz_quota_ilink {
+	struct vz_quota_master *qmblk;
+	struct vz_quota_ugid *qugid[MAXQUOTAS];
+	struct list_head list;
+	unsigned char origin[2];
+};
+
+#endif /* _VZDQUOTA_QLNK_H */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/vzratelimit.h linux-2.6.16.46-0.12-027test011/include/linux/vzratelimit.h
--- linux-2.6.16.46-0.12.orig/include/linux/vzratelimit.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/vzratelimit.h	2007-08-28 17:35:31.000000000 +0400
@@ -0,0 +1,28 @@
+/*
+ *  include/linux/vzratelimit.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __VZ_RATELIMIT_H__
+#define __VZ_RATELIMIT_H__
+
+/*
+ * Generic ratelimiting stuff.
+ */
+
+struct vz_rate_info {
+	int burst;
+	int interval; /* jiffy_t per event */
+	int bucket; /* kind of leaky bucket */
+	unsigned long last; /* last event */
+};
+
+/* Return true if rate limit permits. */
+int vz_ratelimit(struct vz_rate_info *p);
+
+#endif /* __VZ_RATELIMIT_H__ */
diff -upr linux-2.6.16.46-0.12.orig/include/linux/vzstat.h linux-2.6.16.46-0.12-027test011/include/linux/vzstat.h
--- linux-2.6.16.46-0.12.orig/include/linux/vzstat.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/linux/vzstat.h	2007-08-28 17:35:31.000000000 +0400
@@ -0,0 +1,182 @@
+/*
+ *  include/linux/vzstat.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __VZSTAT_H__
+#define __VZSTAT_H__
+
+struct swap_cache_info_struct {
+	unsigned long add_total;
+	unsigned long del_total;
+	unsigned long find_success;
+	unsigned long find_total;
+	unsigned long noent_race;
+	unsigned long exist_race;
+	unsigned long remove_race;
+};
+
+struct kstat_lat_snap_struct {
+	cycles_t maxlat, totlat;
+	unsigned long count;
+};
+struct kstat_lat_pcpu_snap_struct {
+	cycles_t maxlat, totlat;
+	unsigned long count;
+	seqcount_t lock;
+} ____cacheline_aligned_in_smp;
+
+struct kstat_lat_struct {
+	struct kstat_lat_snap_struct cur, last;
+	cycles_t avg[3];
+};
+struct kstat_lat_pcpu_struct {
+	struct kstat_lat_pcpu_snap_struct cur[NR_CPUS];
+	cycles_t max_snap;
+	struct kstat_lat_snap_struct last;
+	cycles_t avg[3];
+};
+
+struct kstat_perf_snap_struct {
+	cycles_t wall_tottime, cpu_tottime;
+	cycles_t wall_maxdur, cpu_maxdur;
+	unsigned long count;
+};
+struct kstat_perf_struct {
+	struct kstat_perf_snap_struct cur, last;
+};
+
+struct kstat_zone_avg {
+	unsigned long		free_pages_avg[3],
+				nr_active_avg[3],
+				nr_inactive_avg[3];
+};
+
+#define KSTAT_ALLOCSTAT_NR 5
+
+struct kernel_stat_glob {
+	unsigned long nr_unint_avg[3];
+
+	unsigned long alloc_fails[KSTAT_ALLOCSTAT_NR];
+	struct kstat_lat_struct alloc_lat[KSTAT_ALLOCSTAT_NR];
+	struct kstat_lat_pcpu_struct sched_lat;
+	struct kstat_lat_struct swap_in;
+
+	struct kstat_perf_struct ttfp, cache_reap,
+			refill_inact, shrink_icache, shrink_dcache;
+
+	struct kstat_zone_avg zone_avg[3];	/* MAX_NR_ZONES */
+} ____cacheline_aligned;
+
+extern struct kernel_stat_glob kstat_glob ____cacheline_aligned;
+extern spinlock_t kstat_glb_lock;
+
+#ifdef CONFIG_VE
+#define KSTAT_PERF_ENTER(name)				\
+	unsigned long flags;				\
+	cycles_t start, sleep_time;			\
+							\
+	start = get_cycles();				\
+	sleep_time = VE_TASK_INFO(current)->sleep_time;	\
+
+#define KSTAT_PERF_LEAVE(name)				\
+	spin_lock_irqsave(&kstat_glb_lock, flags);	\
+	kstat_glob.name.cur.count++;			\
+	start = get_cycles() - start;			\
+	if (kstat_glob.name.cur.wall_maxdur < start)	\
+		kstat_glob.name.cur.wall_maxdur = start;\
+	kstat_glob.name.cur.wall_tottime += start;	\
+	start -= VE_TASK_INFO(current)->sleep_time -	\
+					sleep_time;	\
+	if (kstat_glob.name.cur.cpu_maxdur < start)	\
+		kstat_glob.name.cur.cpu_maxdur = start;	\
+	kstat_glob.name.cur.cpu_tottime += start;	\
+	spin_unlock_irqrestore(&kstat_glb_lock, flags);	\
+
+#else
+#define KSTAT_PERF_ENTER(name)
+#define KSTAT_PERF_LEAVE(name)
+#endif
+
+/*
+ * Add another statistics reading.
+ * Serialization is the caller's due.
+ */
+static inline void KSTAT_LAT_ADD(struct kstat_lat_struct *p,
+		cycles_t dur)
+{
+	p->cur.count++;
+	if (p->cur.maxlat < dur)
+		p->cur.maxlat = dur;
+	p->cur.totlat += dur;
+}
+
+static inline void KSTAT_LAT_PCPU_ADD(struct kstat_lat_pcpu_struct *p, int cpu,
+		cycles_t dur)
+{
+	struct kstat_lat_pcpu_snap_struct *cur;
+
+	cur = &p->cur[cpu];
+	write_seqcount_begin(&cur->lock);
+	cur->count++;
+	if (cur->maxlat < dur)
+		cur->maxlat = dur;
+	cur->totlat += dur;
+	write_seqcount_end(&cur->lock);
+}
+
+/*
+ * Move current statistics to last, clear last.
+ * Serialization is the caller's due.
+ */
+static inline void KSTAT_LAT_UPDATE(struct kstat_lat_struct *p)
+{
+	cycles_t m;
+	memcpy(&p->last, &p->cur, sizeof(p->last));
+	p->cur.maxlat = 0;
+	m = p->last.maxlat;
+	CALC_LOAD(p->avg[0], EXP_1, m)
+	CALC_LOAD(p->avg[1], EXP_5, m)
+	CALC_LOAD(p->avg[2], EXP_15, m)
+}
+
+static inline void KSTAT_LAT_PCPU_UPDATE(struct kstat_lat_pcpu_struct *p)
+{
+	unsigned i, cpu;
+	struct kstat_lat_pcpu_snap_struct snap, *cur;
+	cycles_t m;
+
+	memset(&p->last, 0, sizeof(p->last));
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		cur = &p->cur[cpu];
+		do {
+			i = read_seqcount_begin(&cur->lock);
+			memcpy(&snap, cur, sizeof(snap));
+		} while (read_seqcount_retry(&cur->lock, i));
+		/*
+		 * read above and this update of maxlat is not atomic,
+		 * but this is OK, since it happens rarely and losing
+		 * a couple of peaks is not essential. xemul
+		 */
+		cur->maxlat = 0;
+
+		p->last.count += snap.count;
+		p->last.totlat += snap.totlat;
+		if (p->last.maxlat < snap.maxlat)
+			p->last.maxlat = snap.maxlat;
+	}
+
+	m = (p->last.maxlat > p->max_snap ? p->last.maxlat : p->max_snap);
+	CALC_LOAD(p->avg[0], EXP_1, m);
+	CALC_LOAD(p->avg[1], EXP_5, m);
+	CALC_LOAD(p->avg[2], EXP_15, m);
+	/* reset max_snap to calculate it correctly next time */
+	p->max_snap = 0;
+}
+
+#endif /* __VZSTAT_H__ */
diff -upr linux-2.6.16.46-0.12.orig/include/net/addrconf.h linux-2.6.16.46-0.12-027test011/include/net/addrconf.h
--- linux-2.6.16.46-0.12.orig/include/net/addrconf.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/net/addrconf.h	2007-08-28 17:35:32.000000000 +0400
@@ -244,5 +244,18 @@ extern int if6_proc_init(void);
 extern void if6_proc_exit(void);
 #endif
 
+int addrconf_ifdown(struct net_device *dev, int how);
+int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen);
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+int addrconf_sysctl_init(struct ve_struct *ve);
+void addrconf_sysctl_fini(struct ve_struct *ve);
+void addrconf_sysctl_free(struct ve_struct *ve);
+#else
+#define addrconf_sysctl_init(ve)	(0)
+#define addrconf_sysctl_fini(ve)	do { } while (0)
+#define addrconf_sysctl_free(ve)	do { } while (0)
+#endif
+
 #endif
 #endif
diff -upr linux-2.6.16.46-0.12.orig/include/net/af_unix.h linux-2.6.16.46-0.12-027test011/include/net/af_unix.h
--- linux-2.6.16.46-0.12.orig/include/net/af_unix.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/net/af_unix.h	2007-08-28 17:35:33.000000000 +0400
@@ -9,6 +9,7 @@
 extern void unix_inflight(struct file *fp);
 extern void unix_notinflight(struct file *fp);
 extern void unix_gc(void);
+extern void unix_destruct_fds(struct sk_buff *skb);
 
 #define UNIX_HASH_SIZE	256
 
@@ -19,23 +20,37 @@ extern atomic_t unix_tot_inflight;
 
 static inline struct sock *first_unix_socket(int *i)
 {
+	struct sock *s;
+	struct ve_struct *ve;
+
+	ve = get_exec_env();
 	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
-		if (!hlist_empty(&unix_socket_table[*i]))
-			return __sk_head(&unix_socket_table[*i]);
+		for (s = sk_head(&unix_socket_table[*i]);
+		     s != NULL && !ve_accessible(s->owner_env, ve);
+		     s = sk_next(s));
+		if (s != NULL)
+			return s;
 	}
 	return NULL;
 }
 
 static inline struct sock *next_unix_socket(int *i, struct sock *s)
 {
-	struct sock *next = sk_next(s);
-	/* More in this chain? */
-	if (next)
-		return next;
+	struct ve_struct *ve;
+
+	ve = get_exec_env();
+	for (s = sk_next(s); s != NULL; s = sk_next(s)) {
+		if (!ve_accessible(s->owner_env, ve))
+			continue;
+		return s;
+	}
 	/* Look for next non-empty chain. */
 	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
-		if (!hlist_empty(&unix_socket_table[*i]))
-			return __sk_head(&unix_socket_table[*i]);
+		for (s = sk_head(&unix_socket_table[*i]);
+		     s != NULL && !ve_accessible(s->owner_env, ve);
+		     s = sk_next(s));
+		if (s != NULL)
+			return s;
 	}
 	return NULL;
 }
diff -upr linux-2.6.16.46-0.12.orig/include/net/arp.h linux-2.6.16.46-0.12-027test011/include/net/arp.h
--- linux-2.6.16.46-0.12.orig/include/net/arp.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/net/arp.h	2007-08-28 17:35:32.000000000 +0400
@@ -7,7 +7,16 @@
 
 #define HAVE_ARP_CREATE
 
-extern struct neigh_table arp_tbl;
+#if defined(CONFIG_VE) && defined(CONFIG_INET)
+#define arp_tbl		(*(get_exec_env()->ve_arp_tbl))
+extern int ve_arp_init(struct ve_struct *ve);
+extern void ve_arp_fini(struct ve_struct *ve);
+#else
+extern struct neigh_table	global_arp_tbl;
+#define arp_tbl		global_arp_tbl
+static inline int ve_arp_init(struct ve_struct *ve) { return 0; }
+static inline void ve_arp_fini(struct ve_struct *ve) { ; }
+#endif
 
 extern void	arp_init(void);
 extern int	arp_rcv(struct sk_buff *skb, struct net_device *dev,
diff -upr linux-2.6.16.46-0.12.orig/include/net/compat.h linux-2.6.16.46-0.12-027test011/include/net/compat.h
--- linux-2.6.16.46-0.12.orig/include/net/compat.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/net/compat.h	2007-08-28 17:35:32.000000000 +0400
@@ -23,6 +23,16 @@ struct compat_cmsghdr {
 	compat_int_t	cmsg_type;
 };
 
+#if defined(CONFIG_X86_64)
+#define is_current_32bits() (current_thread_info()->flags & _TIF_IA32)
+#elif defined(CONFIG_IA64)
+#define is_current_32bits() (IS_IA32_PROCESS(task_pt_regs(current)))
+#elif defined(CONFIG_PPC64)
+#define is_current_32bits() (current_thread_info()->flags & _TIF_32BIT)
+#else
+#define is_current_32bits()	0
+#endif
+
 #else /* defined(CONFIG_COMPAT) */
 #define compat_msghdr	msghdr		/* to avoid compiler warnings */
 #endif /* defined(CONFIG_COMPAT) */
diff -upr linux-2.6.16.46-0.12.orig/include/net/flow.h linux-2.6.16.46-0.12-027test011/include/net/flow.h
--- linux-2.6.16.46-0.12.orig/include/net/flow.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/net/flow.h	2007-08-28 17:35:32.000000000 +0400
@@ -10,6 +10,7 @@
 #include <linux/in6.h>
 #include <asm/atomic.h>
 
+struct ve_struct;
 struct flowi {
 	int	oif;
 	int	iif;
@@ -78,6 +79,9 @@ struct flowi {
 #define fl_icmp_type	uli_u.icmpt.type
 #define fl_icmp_code	uli_u.icmpt.code
 #define fl_ipsec_spi	uli_u.spi
+#ifdef CONFIG_VE
+	struct ve_struct *owner_env;
+#endif
 } __attribute__((__aligned__(BITS_PER_LONG/8)));
 
 #define FLOW_DIR_IN	0
diff -upr linux-2.6.16.46-0.12.orig/include/net/icmp.h linux-2.6.16.46-0.12-027test011/include/net/icmp.h
--- linux-2.6.16.46-0.12.orig/include/net/icmp.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/net/icmp.h	2007-08-28 17:35:32.000000000 +0400
@@ -31,9 +31,14 @@ struct icmp_err {
 
 extern struct icmp_err icmp_err_convert[];
 DECLARE_SNMP_STAT(struct icmp_mib, icmp_statistics);
-#define ICMP_INC_STATS(field)		SNMP_INC_STATS(icmp_statistics, field)
-#define ICMP_INC_STATS_BH(field)	SNMP_INC_STATS_BH(icmp_statistics, field)
-#define ICMP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(icmp_statistics, field)
+#if defined(CONFIG_VE) && defined(CONFIG_INET)
+#define ve_icmp_statistics (get_exec_env()->_icmp_statistics)
+#else
+#define ve_icmp_statistics icmp_statistics
+#endif
+#define ICMP_INC_STATS(field)		SNMP_INC_STATS(ve_icmp_statistics, field)
+#define ICMP_INC_STATS_BH(field)	SNMP_INC_STATS_BH(ve_icmp_statistics, field)
+#define ICMP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_icmp_statistics, field)
 
 struct dst_entry;
 struct net_proto_family;
diff -upr linux-2.6.16.46-0.12.orig/include/net/if_inet6.h linux-2.6.16.46-0.12-027test011/include/net/if_inet6.h
--- linux-2.6.16.46-0.12.orig/include/net/if_inet6.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/net/if_inet6.h	2007-08-28 17:35:32.000000000 +0400
@@ -194,7 +194,14 @@ struct inet6_dev 
 	unsigned long		tstamp; /* ipv6InterfaceTable update timestamp */
 };
 
-extern struct ipv6_devconf ipv6_devconf;
+extern struct ipv6_devconf global_ipv6_devconf;
+extern struct ipv6_devconf global_ipv6_devconf_dflt;
+
+#ifdef CONFIG_VE
+#define ve_ipv6_devconf	(*(get_exec_env()->_ipv6_devconf))
+#else
+#define ve_ipv6_devconf	global_ipv6_devconf
+#endif
 
 static inline void ipv6_eth_mc_map(struct in6_addr *addr, char *buf)
 {
diff -upr linux-2.6.16.46-0.12.orig/include/net/inet6_hashtables.h linux-2.6.16.46-0.12-027test011/include/net/inet6_hashtables.h
--- linux-2.6.16.46-0.12.orig/include/net/inet6_hashtables.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/net/inet6_hashtables.h	2007-08-28 17:35:32.000000000 +0400
@@ -27,11 +27,13 @@ struct inet_hashinfo;
 
 /* I have no idea if this is a good hash for v6 or not. -DaveM */
 static inline unsigned int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport,
-				const struct in6_addr *faddr, const u16 fport)
+				const struct in6_addr *faddr, const u16 fport,
+				const envid_t veid)
 {
 	unsigned int hashent = (lport ^ fport);
 
 	hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
+	hashent ^= (veid ^ (veid >> 16));
 	hashent ^= hashent >> 16;
 	hashent ^= hashent >> 8;
 	return hashent;
@@ -45,7 +47,7 @@ static inline int inet6_sk_ehashfn(const
 	const struct in6_addr *faddr = &np->daddr;
 	const __u16 lport = inet->num;
 	const __u16 fport = inet->dport;
-	return inet6_ehashfn(laddr, lport, faddr, fport);
+	return inet6_ehashfn(laddr, lport, faddr, fport, VEID(sk->owner_env));
 }
 
 static inline void __inet6_hash(struct inet_hashinfo *hashinfo,
@@ -94,14 +96,15 @@ static inline struct sock *
 	/* Optimize here for direct hit, only listening connections can
 	 * have wildcards anyways.
 	 */
-	unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport);
+	struct ve_struct *env = get_exec_env();
+	unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport, VEID(env));
 	struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
 
 	prefetch(head->chain.first);
 	read_lock(&head->lock);
 	sk_for_each(sk, node, &head->chain) {
 		/* For IPV6 do the cheaper port and family tests first. */
-		if (INET6_MATCH(sk, hash, saddr, daddr, ports, dif))
+		if (INET6_MATCH(sk, hash, saddr, daddr, ports, dif, env))
 			goto hit; /* You sunk my battleship! */
 	}
 	/* Must check for a TIME_WAIT'er before going to listener hash. */
@@ -114,6 +117,7 @@ static inline struct sock *
 
 			if (ipv6_addr_equal(&tw6->tw_v6_daddr, saddr)	&&
 			    ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr)	&&
+			    ve_accessible_strict(tw->tw_owner_env, VEID(env)) &&
 			    (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
 				goto hit;
 		}
diff -upr linux-2.6.16.46-0.12.orig/include/net/inet_hashtables.h linux-2.6.16.46-0.12-027test011/include/net/inet_hashtables.h
--- linux-2.6.16.46-0.12.orig/include/net/inet_hashtables.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/net/inet_hashtables.h	2007-08-28 17:35:32.000000000 +0400
@@ -75,6 +75,7 @@ struct inet_ehash_bucket {
  * ports are created in O(1) time?  I thought so. ;-)	-DaveM
  */
 struct inet_bind_bucket {
+	struct ve_struct	*owner_env;
 	unsigned short		port;
 	signed short		fastreuse;
 	struct hlist_node	node;
@@ -139,37 +140,43 @@ static inline struct inet_ehash_bucket *
 extern struct inet_bind_bucket *
 		    inet_bind_bucket_create(kmem_cache_t *cachep,
 					    struct inet_bind_hashbucket *head,
-					    const unsigned short snum);
+					    const unsigned short snum,
+					    struct ve_struct *env);
 extern void inet_bind_bucket_destroy(kmem_cache_t *cachep,
 				     struct inet_bind_bucket *tb);
 
-static inline int inet_bhashfn(const __u16 lport, const int bhash_size)
+static inline int inet_bhashfn(const __u16 lport, const int bhash_size,
+		unsigned veid)
 {
-	return lport & (bhash_size - 1);
+	return ((lport + (veid ^ (veid >> 16))) & (bhash_size - 1));
 }
 
 extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
 			   const unsigned short snum);
 
 /* These can have wildcards, don't try too hard. */
-static inline int inet_lhashfn(const unsigned short num)
+static inline int inet_lhashfn(const unsigned short num, unsigned veid)
 {
-	return num & (INET_LHTABLE_SIZE - 1);
+	return ((num + (veid ^ (veid >> 16))) & (INET_LHTABLE_SIZE - 1));
 }
 
 static inline int inet_sk_listen_hashfn(const struct sock *sk)
 {
-	return inet_lhashfn(inet_sk(sk)->num);
+	return inet_lhashfn(inet_sk(sk)->num, VEID(sk->owner_env));
 }
 
 /* Caller must disable local BH processing. */
 static inline void __inet_inherit_port(struct inet_hashinfo *table,
 				       struct sock *sk, struct sock *child)
 {
-	const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size);
-	struct inet_bind_hashbucket *head = &table->bhash[bhash];
+	int bhash;
+	struct inet_bind_hashbucket *head;
 	struct inet_bind_bucket *tb;
 
+	bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size,
+			VEID(child->owner_env));
+	head = &table->bhash[bhash];
+
 	spin_lock(&head->lock);
 	tb = inet_csk(sk)->icsk_bind_hash;
 	sk_add_bind_node(child, &tb->owners);
@@ -275,7 +282,8 @@ static inline int inet_iif(const struct 
 extern struct sock *__inet_lookup_listener(const struct hlist_head *head,
 					   const u32 daddr,
 					   const unsigned short hnum,
-					   const int dif);
+					   const int dif,
+					   struct ve_struct *env);
 
 /* Optimize the common listener case. */
 static inline struct sock *
@@ -285,18 +293,21 @@ static inline struct sock *
 {
 	struct sock *sk = NULL;
 	const struct hlist_head *head;
+	struct ve_struct *env;
 
+	env = get_exec_env();
 	read_lock(&hashinfo->lhash_lock);
-	head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
+	head = &hashinfo->listening_hash[inet_lhashfn(hnum, VEID(env))];
 	if (!hlist_empty(head)) {
 		const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
 
 		if (inet->num == hnum && !sk->sk_node.next &&
+		    ve_accessible_strict(sk->owner_env, env) &&
 		    (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
 		    (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
 		    !sk->sk_bound_dev_if)
 			goto sherry_cache;
-		sk = __inet_lookup_listener(head, daddr, hnum, dif);
+		sk = __inet_lookup_listener(head, daddr, hnum, dif, env);
 	}
 	if (sk) {
 sherry_cache:
@@ -323,25 +334,25 @@ sherry_cache:
 #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
 	const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr));
 #endif /* __BIG_ENDIAN */
-#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
+#define INET_MATCH_ALLVE(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
 	(((__sk)->sk_hash == (__hash))				&&	\
 	 ((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie))	&&	\
 	 ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports))	&&	\
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
-#define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
+#define INET_TW_MATCH_ALLVE(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
 	(((__sk)->sk_hash == (__hash))				&&	\
 	 ((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) &&	\
 	 ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) &&	\
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
 #else /* 32-bit arch */
 #define INET_ADDR_COOKIE(__name, __saddr, __daddr)
-#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)	\
+#define INET_MATCH_ALLVE(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)	\
 	(((__sk)->sk_hash == (__hash))				&&	\
 	 (inet_sk(__sk)->daddr		== (__saddr))		&&	\
 	 (inet_sk(__sk)->rcv_saddr	== (__daddr))		&&	\
 	 ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports))	&&	\
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
-#define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif)	\
+#define INET_TW_MATCH_ALLVE(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif)	\
 	(((__sk)->sk_hash == (__hash))				&&	\
 	 (inet_twsk(__sk)->tw_daddr	== (__saddr))		&&	\
 	 (inet_twsk(__sk)->tw_rcv_saddr	== (__daddr))		&&	\
@@ -349,6 +360,18 @@ sherry_cache:
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
 #endif /* 64-bit arch */
 
+#define INET_MATCH(__sk, __hash, __cookie, __saddr,			\
+					__daddr, __ports, __dif, __ve)  \
+        (INET_MATCH_ALLVE((__sk), (__hash), (__cookie), (__saddr),	\
+			  		(__daddr), (__ports), (__dif))	\
+	 && ve_accessible_strict((__sk)->owner_env, (__ve)))
+
+#define INET_TW_MATCH(__sk, __hash, __cookie, __saddr,			\
+					__daddr, __ports, __dif, __ve)	\
+        (INET_TW_MATCH_ALLVE((__sk), (__hash), (__cookie), (__saddr),	\
+					(__daddr), (__ports), (__dif))	\
+	 && ve_accessible_strict(inet_twsk(__sk)->tw_owner_env, VEID(__ve)))
+
 /*
  * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
  * not check it for lookups anymore, thanks Alexey. -DaveM
@@ -368,19 +391,25 @@ static inline struct sock *
 	/* Optimize here for direct hit, only listening connections can
 	 * have wildcards anyways.
 	 */
-	unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport);
-	struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
-
+	unsigned int hash;
+	struct inet_ehash_bucket *head;
+	struct ve_struct *env;
+
+	env = get_exec_env();
+	hash = inet_ehashfn(daddr, hnum, saddr, sport, VEID(env));
+	head = inet_ehash_bucket(hashinfo, hash);
 	prefetch(head->chain.first);
 	read_lock(&head->lock);
 	sk_for_each(sk, node, &head->chain) {
-		if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
+		if (INET_MATCH(sk, hash, acookie, saddr, daddr,
+					ports, dif, env))
 			goto hit; /* You sunk my battleship! */
 	}
 
 	/* Must check for a TIME_WAIT'er before going to listener hash. */
 	sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) {
-		if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
+		if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr,
+					ports, dif, env))
 			goto hit;
 	}
 	sk = NULL;
diff -upr linux-2.6.16.46-0.12.orig/include/net/inet_sock.h linux-2.6.16.46-0.12-027test011/include/net/inet_sock.h
--- linux-2.6.16.46-0.12.orig/include/net/inet_sock.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/net/inet_sock.h	2007-08-28 17:35:32.000000000 +0400
@@ -171,9 +171,10 @@ static inline void inet_sk_copy_descenda
 extern int inet_sk_rebuild_header(struct sock *sk);
 
 static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport,
-					const __u32 faddr, const __u16 fport)
+					const __u32 faddr, const __u16 fport,
+					const envid_t veid)
 {
-	unsigned int h = (laddr ^ lport) ^ (faddr ^ fport);
+	int h = (laddr ^ lport) ^ (faddr ^ fport) ^ (veid ^ (veid >> 16));
 	h ^= h >> 16;
 	h ^= h >> 8;
 	return h;
@@ -186,8 +187,9 @@ static inline int inet_sk_ehashfn(const 
 	const __u16 lport = inet->num;
 	const __u32 faddr = inet->daddr;
 	const __u16 fport = inet->dport;
+	envid_t veid = VEID(sk->owner_env);
 
-	return inet_ehashfn(laddr, lport, faddr, fport);
+	return inet_ehashfn(laddr, lport, faddr, fport, veid);
 }
 
 #endif	/* _INET_SOCK_H */
diff -upr linux-2.6.16.46-0.12.orig/include/net/inet_timewait_sock.h linux-2.6.16.46-0.12-027test011/include/net/inet_timewait_sock.h
--- linux-2.6.16.46-0.12.orig/include/net/inet_timewait_sock.h	2007-08-24 19:28:09.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/net/inet_timewait_sock.h	2007-08-28 17:35:32.000000000 +0400
@@ -82,6 +82,7 @@ struct inet_timewait_death_row {
 	struct inet_hashinfo 	*hashinfo;
 	int			sysctl_tw_recycle;
 	int			sysctl_max_tw_buckets;
+	int			ub_managed;
 };
 
 extern void inet_twdr_hangman(unsigned long data);
@@ -134,6 +135,7 @@ struct inet_timewait_sock {
 	unsigned long		tw_ttd;
 	struct inet_bind_bucket	*tw_tb;
 	struct hlist_node	tw_death_node;
+	envid_t			tw_owner_env;
 };
 
 static inline void inet_twsk_add_node(struct inet_timewait_sock *tw,
diff -upr linux-2.6.16.46-0.12.orig/include/net/ip.h linux-2.6.16.46-0.12-027test011/include/net/ip.h
--- linux-2.6.16.46-0.12.orig/include/net/ip.h	2007-08-24 19:28:06.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/net/ip.h	2007-08-28 17:35:32.000000000 +0400
@@ -153,15 +153,25 @@ struct ipv4_config
 
 extern struct ipv4_config ipv4_config;
 DECLARE_SNMP_STAT(struct ipstats_mib, ip_statistics);
-#define IP_INC_STATS(field)		SNMP_INC_STATS(ip_statistics, field)
-#define IP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ip_statistics, field)
-#define IP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ip_statistics, field)
+#ifdef CONFIG_VE
+#define ve_ip_statistics (get_exec_env()->_ip_statistics)
+#else
+#define ve_ip_statistics ip_statistics
+#endif
+#define IP_INC_STATS(field)		SNMP_INC_STATS(ve_ip_statistics, field)
+#define IP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ve_ip_statistics, field)
+#define IP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_ip_statistics, field)
 DECLARE_SNMP_STAT(struct linux_mib, net_statistics);
-#define NET_INC_STATS(field)		SNMP_INC_STATS(net_statistics, field)
-#define NET_INC_STATS_BH(field)		SNMP_INC_STATS_BH(net_statistics, field)
-#define NET_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(net_statistics, field)
-#define NET_ADD_STATS_BH(field, adnd)	SNMP_ADD_STATS_BH(net_statistics, field, adnd)
-#define NET_ADD_STATS_USER(field, adnd)	SNMP_ADD_STATS_USER(net_statistics, field, adnd)
+#if defined(CONFIG_VE) && defined(CONFIG_INET)
+#define ve_net_statistics (get_exec_env()->_net_statistics)
+#else
+#define ve_net_statistics net_statistics
+#endif
+#define NET_INC_STATS(field)		SNMP_INC_STATS(ve_net_statistics, field)
+#define NET_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ve_net_statistics, field)
+#define NET_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_net_statistics, field)
+#define NET_ADD_STATS_BH(field, adnd)	SNMP_ADD_STATS_BH(ve_net_statistics, field, adnd)
+#define NET_ADD_STATS_USER(field, adnd)	SNMP_ADD_STATS_USER(ve_net_statistics, field, adnd)
 
 extern int sysctl_local_port_range[2];
 extern int sysctl_ip_default_ttl;
@@ -381,4 +391,11 @@ extern int ip_misc_proc_init(void);
 
 extern struct ctl_table ipv4_table[];
 
+#ifdef CONFIG_SYSCTL
+extern int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
+			void __user *buffer, size_t *lenp, loff_t *ppos);
+extern int ipv4_sysctl_forward_strategy(ctl_table *table, int __user *name,
+			int nlen, void __user *oldval, size_t __user *oldlenp,
+			 void __user *newval, size_t newlen, void **context);
+#endif
 #endif	/* _IP_H */
diff -upr linux-2.6.16.46-0.12.orig/include/net/ip6_fib.h linux-2.6.16.46-0.12-027test011/include/net/ip6_fib.h
--- linux-2.6.16.46-0.12.orig/include/net/ip6_fib.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/net/ip6_fib.h	2007-08-28 17:35:32.000000000 +0400
@@ -78,6 +78,15 @@ struct rt6_info
 	u8				rt6i_protocol;
 };
 
+struct fib6_table
+{
+	struct list_head	list;
+	struct fib6_node	root;
+	struct ve_struct	*owner_env;
+};
+
+extern struct list_head	fib6_table_list;
+
 struct fib6_walker_t
 {
 	struct fib6_walker_t *prev, *next;
@@ -143,7 +152,7 @@ struct rt6_statistics {
 
 typedef void			(*f_pnode)(struct fib6_node *fn, void *);
 
-extern struct fib6_node		ip6_routing_table;
+extern struct fib6_node		ve0_ip6_routing_table;
 
 /*
  *	exported functions
diff -upr linux-2.6.16.46-0.12.orig/include/net/ip6_route.h linux-2.6.16.46-0.12-027test011/include/net/ip6_route.h
--- linux-2.6.16.46-0.12.orig/include/net/ip6_route.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/net/ip6_route.h	2007-08-28 17:35:32.000000000 +0400
@@ -139,5 +139,13 @@ static inline int ipv6_unicast_destinati
 	return rt->rt6i_flags & RTF_LOCAL;
 }
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+int init_ve_route6(struct ve_struct *ve);
+void fini_ve_route6(struct ve_struct *ve);
+#else
+#define init_ve_route6(ve)	(0)
+#define fini_ve_route6(ve)	do { } while (0)
+#endif
+
 #endif
 #endif
diff -upr linux-2.6.16.46-0.12.orig/include/net/ip_fib.h linux-2.6.16.46-0.12-027test011/include/net/ip_fib.h
--- linux-2.6.16.46-0.12.orig/include/net/ip_fib.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/net/ip_fib.h	2007-08-28 17:35:32.000000000 +0400
@@ -168,10 +168,22 @@ struct fib_table {
 	unsigned char	tb_data[0];
 };
 
+struct fn_zone;
+struct fn_hash
+{
+	struct fn_zone	*fn_zones[33];
+	struct fn_zone	*fn_zone_list;
+};
+
 #ifndef CONFIG_IP_MULTIPLE_TABLES
 
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define ip_fib_local_table 	get_exec_env()->_local_table
+#define ip_fib_main_table 	get_exec_env()->_main_table
+#else
 extern struct fib_table *ip_fib_local_table;
 extern struct fib_table *ip_fib_main_table;
+#endif
 
 static inline struct fib_table *fib_get_table(int id)
 {
@@ -203,7 +215,12 @@ static inline void fib_select_default(co
 #define ip_fib_local_table (fib_tables[RT_TABLE_LOCAL])
 #define ip_fib_main_table (fib_tables[RT_TABLE_MAIN])
 
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define fib_tables get_exec_env()->_fib_tables
+#else
 extern struct fib_table * fib_tables[RT_TABLE_MAX+1];
+#endif
+
 extern int fib_lookup(const struct flowi *flp, struct fib_result *res);
 extern struct fib_table *__fib_new_table(int id);
 extern void fib_rule_put(struct fib_rule *r);
@@ -250,10 +267,19 @@ extern u32  __fib_res_prefsrc(struct fib
 
 /* Exported by fib_hash.c */
 extern struct fib_table *fib_hash_init(int id);
+#if defined(CONFIG_VE) && defined(CONFIG_INET)
+struct ve_struct;
+extern int init_ve_route(struct ve_struct *ve);
+extern void fini_ve_route(struct ve_struct *ve);
+#else
+#define init_ve_route(ve)	(0)
+#define fini_ve_route(ve)	do { } while (0)
+#endif
 
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 /* Exported by fib_rules.c */
-
+extern int fib_rules_create(void);
+extern void fib_rules_destroy(void);
 extern int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
 extern int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
 extern int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb);
diff -upr linux-2.6.16.46-0.12.orig/include/net/ipv6.h linux-2.6.16.46-0.12-027test011/include/net/ipv6.h
--- linux-2.6.16.46-0.12.orig/include/net/ipv6.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/net/ipv6.h	2007-08-28 17:35:32.000000000 +0400
@@ -113,39 +113,48 @@ extern int sysctl_mld_max_msf;
 
 /* MIBs */
 DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics);
-#define IP6_INC_STATS(field)		SNMP_INC_STATS(ipv6_statistics, field)
-#define IP6_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ipv6_statistics, field)
-#define IP6_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ipv6_statistics, field)
+#ifdef CONFIG_VE
+#define ve_ipv6_statistics (get_exec_env()->_ipv6_statistics)
+#define ve_icmpv6_statistics (get_exec_env()->_icmpv6_statistics)
+#define ve_udp_stats_in6 (get_exec_env()->_udp_stats_in6)
+#else
+#define ve_ipv6_statistics ipv6_statistics
+#define ve_icmpv6_statistics icmpv6_statistics
+#define ve_udp_stats_in6 udp_stats_in6
+#endif
+#define IP6_INC_STATS(field)		SNMP_INC_STATS(ve_ipv6_statistics, field)
+#define IP6_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ve_ipv6_statistics, field)
+#define IP6_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_ipv6_statistics, field)
 DECLARE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics);
 #define ICMP6_INC_STATS(idev, field)		({			\
 	struct inet6_dev *_idev = (idev);				\
 	if (likely(_idev != NULL))					\
 		SNMP_INC_STATS(idev->stats.icmpv6, field); 		\
-	SNMP_INC_STATS(icmpv6_statistics, field);			\
+	SNMP_INC_STATS(ve_icmpv6_statistics, field);			\
 })
 #define ICMP6_INC_STATS_BH(idev, field)		({			\
 	struct inet6_dev *_idev = (idev);				\
 	if (likely(_idev != NULL))					\
 		SNMP_INC_STATS_BH((_idev)->stats.icmpv6, field);	\
-	SNMP_INC_STATS_BH(icmpv6_statistics, field);			\
+	SNMP_INC_STATS_BH(ve_icmpv6_statistics, field);			\
 })
 #define ICMP6_INC_STATS_USER(idev, field) 	({			\
 	struct inet6_dev *_idev = (idev);				\
 	if (likely(_idev != NULL))					\
 		SNMP_INC_STATS_USER(_idev->stats.icmpv6, field);	\
-	SNMP_INC_STATS_USER(icmpv6_statistics, field);			\
+	SNMP_INC_STATS_USER(ve_icmpv6_statistics, field);			\
 })
 #define ICMP6_INC_STATS_OFFSET_BH(idev, field, offset)	({			\
 	struct inet6_dev *_idev = idev;						\
 	__typeof__(offset) _offset = (offset);					\
 	if (likely(_idev != NULL))						\
 		SNMP_INC_STATS_OFFSET_BH(_idev->stats.icmpv6, field, _offset);	\
-	SNMP_INC_STATS_OFFSET_BH(icmpv6_statistics, field, _offset);    	\
+	SNMP_INC_STATS_OFFSET_BH(ve_icmpv6_statistics, field, _offset);    	\
 })
 DECLARE_SNMP_STAT(struct udp_mib, udp_stats_in6);
-#define UDP6_INC_STATS(field)		SNMP_INC_STATS(udp_stats_in6, field)
-#define UDP6_INC_STATS_BH(field)	SNMP_INC_STATS_BH(udp_stats_in6, field)
-#define UDP6_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(udp_stats_in6, field)
+#define UDP6_INC_STATS(field)		SNMP_INC_STATS(ve_udp_stats_in6, field)
+#define UDP6_INC_STATS_BH(field)	SNMP_INC_STATS_BH(ve_udp_stats_in6, field)
+#define UDP6_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_udp_stats_in6, field)
 
 int snmp6_register_dev(struct inet6_dev *idev);
 int snmp6_unregister_dev(struct inet6_dev *idev);
@@ -154,6 +163,14 @@ int snmp6_free_dev(struct inet6_dev *ide
 int snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign);
 void snmp6_mib_free(void *ptr[2]);
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+int ve_snmp_proc_init(void);
+void ve_snmp_proc_fini(void);
+#else
+#define ve_snmp_proc_init(void)	(0)
+#define ve_snmp_proc_fini(void)	do { } while (0)
+#endif
+
 struct ip6_ra_chain
 {
 	struct ip6_ra_chain	*next;
diff -upr linux-2.6.16.46-0.12.orig/include/net/ndisc.h linux-2.6.16.46-0.12-027test011/include/net/ndisc.h
--- linux-2.6.16.46-0.12.orig/include/net/ndisc.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/net/ndisc.h	2007-08-28 17:35:32.000000000 +0400
@@ -50,7 +50,20 @@ struct net_device;
 struct net_proto_family;
 struct sk_buff;
 
-extern struct neigh_table nd_tbl;
+#ifdef CONFIG_VE
+#define nd_tbl		(*(get_exec_env()->ve_nd_tbl))
+#else
+#define nd_tbl		global_nd_tbl
+extern struct neigh_table global_nd_tbl;
+#endif
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+extern int ve_ndisc_init(struct ve_struct *ve);
+extern void ve_ndisc_fini(struct ve_struct *ve);
+#else
+#define ve_ndisc_init(ve)	(0)
+#define ve_ndisc_fini(ve)	do { } while (0)
+#endif
 
 struct nd_msg {
         struct icmp6hdr	icmph;
@@ -128,6 +141,7 @@ extern int 			ndisc_ifinfo_sysctl_change
 extern void 			inet6_ifinfo_notify(int event,
 						    struct inet6_dev *idev);
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 static inline struct neighbour * ndisc_get_neigh(struct net_device *dev, struct in6_addr *addr)
 {
 
@@ -136,6 +150,7 @@ static inline struct neighbour * ndisc_g
 
 	return NULL;
 }
+#endif
 
 
 #endif /* __KERNEL__ */
diff -upr linux-2.6.16.46-0.12.orig/include/net/neighbour.h linux-2.6.16.46-0.12-027test011/include/net/neighbour.h
--- linux-2.6.16.46-0.12.orig/include/net/neighbour.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/net/neighbour.h	2007-08-28 17:35:32.000000000 +0400
@@ -191,6 +191,8 @@ struct neigh_table
 	atomic_t		entries;
 	rwlock_t		lock;
 	unsigned long		last_rand;
+	struct ve_struct	*owner_env;
+	struct user_beancounter *owner_ub;
 	kmem_cache_t		*kmem_cachep;
 	struct neigh_statistics	*stats;
 	struct neighbour	**hash_buckets;
@@ -210,7 +212,7 @@ struct neigh_table
 #define NEIGH_UPDATE_F_ISROUTER			0x40000000
 #define NEIGH_UPDATE_F_ADMIN			0x80000000
 
-extern void			neigh_table_init(struct neigh_table *tbl);
+extern int			neigh_table_init(struct neigh_table *tbl);
 extern int			neigh_table_clear(struct neigh_table *tbl);
 extern struct neighbour *	neigh_lookup(struct neigh_table *tbl,
 					     const void *pkey,
diff -upr linux-2.6.16.46-0.12.orig/include/net/netlink_sock.h linux-2.6.16.46-0.12-027test011/include/net/netlink_sock.h
--- linux-2.6.16.46-0.12.orig/include/net/netlink_sock.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/net/netlink_sock.h	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,22 @@
+#ifndef __NET_NETLINK_SOCK_H
+#define __NET_NETLINK_SOCK_H
+
+struct netlink_sock {
+	/* struct sock has to be the first member of netlink_sock */
+	struct sock		sk;
+	u32			pid;
+	u32			dst_pid;
+	u32			dst_group;
+	u32			flags;
+	u32			subscriptions;
+	u32			ngroups;
+	unsigned long		*groups;
+	unsigned long		state;
+	wait_queue_head_t	wait;
+	struct netlink_callback	*cb;
+	spinlock_t		cb_lock;
+	void			(*data_ready)(struct sock *sk, int bytes);
+	struct module		*module;
+};
+
+#endif /* __NET_NETLINK_SOCK_H */
diff -upr linux-2.6.16.46-0.12.orig/include/net/route.h linux-2.6.16.46-0.12-027test011/include/net/route.h
--- linux-2.6.16.46-0.12.orig/include/net/route.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/net/route.h	2007-08-28 17:35:32.000000000 +0400
@@ -138,6 +138,7 @@ static inline void ip_rt_put(struct rtab
 #define IPTOS_RT_MASK	(IPTOS_TOS_MASK & ~3)
 
 extern __u8 ip_tos2prio[16];
+extern int ip_rt_src_check;
 
 static inline char rt_tos2priority(u8 tos)
 {
@@ -201,4 +202,14 @@ static inline struct inet_peer *rt_get_p
 
 extern ctl_table ipv4_route_table[];
 
+#ifdef CONFIG_SYSCTL
+extern int ipv4_flush_delay;
+extern int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
+		struct file *filp, void __user *buffer,	size_t *lenp,
+		loff_t *ppos);
+extern int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
+		int __user *name, int nlen, void __user *oldval,
+		size_t __user *oldlenp,	void __user *newval,
+		size_t newlen, void **context);
+#endif
 #endif	/* _ROUTE_H */
diff -upr linux-2.6.16.46-0.12.orig/include/net/scm.h linux-2.6.16.46-0.12-027test011/include/net/scm.h
--- linux-2.6.16.46-0.12.orig/include/net/scm.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/net/scm.h	2007-08-28 17:35:31.000000000 +0400
@@ -40,7 +40,7 @@ static __inline__ int scm_send(struct so
 	memset(scm, 0, sizeof(*scm));
 	scm->creds.uid = current->uid;
 	scm->creds.gid = current->gid;
-	scm->creds.pid = current->tgid;
+	scm->creds.pid = virt_tgid(current);
 	if (msg->msg_controllen <= 0)
 		return 0;
 	return __scm_send(sock, msg, scm);
diff -upr linux-2.6.16.46-0.12.orig/include/net/sock.h linux-2.6.16.46-0.12-027test011/include/net/sock.h
--- linux-2.6.16.46-0.12.orig/include/net/sock.h	2007-08-24 19:28:35.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/net/sock.h	2007-08-28 17:35:32.000000000 +0400
@@ -55,6 +55,8 @@
 #include <net/dst.h>
 #include <net/checksum.h>
 
+#include <ub/ub_net.h>
+
 /*
  * This structure really needs to be cleaned up.
  * Most of it is for TCP, and not used by any of
@@ -253,6 +255,8 @@ struct sock {
   	int			(*sk_backlog_rcv)(struct sock *sk,
 						  struct sk_buff *skb);  
 	void                    (*sk_destruct)(struct sock *sk);
+	struct sock_beancounter sk_bc;
+	struct ve_struct	*owner_env;
 };
 
 /*
@@ -487,6 +491,8 @@ static inline void sk_add_backlog(struct
 })
 
 extern int sk_stream_wait_connect(struct sock *sk, long *timeo_p);
+extern int __sk_stream_wait_memory(struct sock *sk, long *timeo_p,
+				unsigned long amount);
 extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p);
 extern void sk_stream_wait_close(struct sock *sk, long timeo_p);
 extern int sk_stream_error(struct sock *sk, int flags, int err);
@@ -708,8 +714,11 @@ static inline void sk_stream_writequeue_
 
 static inline int sk_stream_rmem_schedule(struct sock *sk, struct sk_buff *skb)
 {
-	return (int)skb->truesize <= sk->sk_forward_alloc ||
-		sk_stream_mem_schedule(sk, skb->truesize, 1);
+	if ((int)skb->truesize > sk->sk_forward_alloc &&
+		!sk_stream_mem_schedule(sk, skb->truesize, 1))
+		/* The situation is bad according to mainstream. Den */
+		return 0;
+	return ub_tcprcvbuf_charge(sk, skb) == 0;
 }
 
 static inline int sk_stream_wmem_schedule(struct sock *sk, int size)
@@ -767,6 +776,11 @@ extern struct sk_buff 		*sock_alloc_send
 						     unsigned long size,
 						     int noblock,
 						     int *errcode);
+extern struct sk_buff 		*sock_alloc_send_skb2(struct sock *sk,
+						     unsigned long size,
+						     unsigned long size2,
+						     int noblock,
+						     int *errcode);
 extern void *sock_kmalloc(struct sock *sk, int size,
 			  gfp_t priority);
 extern void sock_kfree_s(struct sock *sk, void *mem, int size);
@@ -1128,6 +1142,7 @@ static inline int skb_copy_to_page(struc
 
 static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
 {
+	WARN_ON(skb->destructor);
 	sock_hold(sk);
 	skb->sk = sk;
 	skb->destructor = sock_wfree;
@@ -1136,6 +1151,7 @@ static inline void skb_set_owner_w(struc
 
 static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
 {
+	WARN_ON(skb->destructor);
 	skb->sk = sk;
 	skb->destructor = sock_rfree;
 	atomic_add(skb->truesize, &sk->sk_rmem_alloc);
@@ -1160,6 +1176,10 @@ static inline int sock_queue_rcv_skb(str
 		goto out;
 	}
 
+	err = ub_sockrcvbuf_charge(sk, skb);
+	if (err < 0)
+		goto out;
+
 	/* It would be deadlock, if sock_queue_rcv_skb is used
 	   with socket lock! We assume that users of this
 	   function are lock free.
diff -upr linux-2.6.16.46-0.12.orig/include/net/tcp.h linux-2.6.16.46-0.12-027test011/include/net/tcp.h
--- linux-2.6.16.46-0.12.orig/include/net/tcp.h	2007-08-24 19:28:35.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/net/tcp.h	2007-08-28 17:35:33.000000000 +0400
@@ -41,6 +41,13 @@
 #include <net/tcp_states.h>
 
 #include <linux/seq_file.h>
+#include <ub/ub_net.h>
+
+#define TCP_PAGE(sk)	(sk->sk_sndmsg_page)
+#define TCP_OFF(sk)	(sk->sk_sndmsg_off)
+
+#define TW_WSCALE_MASK		0x0f
+#define TW_WSCALE_SPEC		0x10
 
 extern struct inet_hashinfo tcp_hashinfo;
 
@@ -212,7 +219,9 @@ extern int sysctl_tcp_mem[3];
 extern int sysctl_tcp_wmem[3];
 extern int sysctl_tcp_rmem[3];
 extern int sysctl_tcp_app_win;
+#ifndef sysctl_tcp_adv_win_scale
 extern int sysctl_tcp_adv_win_scale;
+#endif
 extern int sysctl_tcp_tw_reuse;
 extern int sysctl_tcp_frto;
 extern int sysctl_tcp_low_latency;
@@ -221,6 +230,9 @@ extern int sysctl_tcp_nometrics_save;
 extern int sysctl_tcp_moderate_rcvbuf;
 extern int sysctl_tcp_tso_win_divisor;
 extern int sysctl_tcp_abc;
+extern int sysctl_tcp_max_tw_kmem_fraction;
+extern int sysctl_tcp_max_tw_buckets_ub;
+
 
 extern atomic_t tcp_memory_allocated;
 extern atomic_t tcp_sockets_allocated;
@@ -252,12 +264,17 @@ static inline int between(__u32 seq1, __
 extern struct proto tcp_prot;
 
 DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics);
-#define TCP_INC_STATS(field)		SNMP_INC_STATS(tcp_statistics, field)
-#define TCP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(tcp_statistics, field)
-#define TCP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(tcp_statistics, field)
-#define TCP_DEC_STATS(field)		SNMP_DEC_STATS(tcp_statistics, field)
-#define TCP_ADD_STATS_BH(field, val)	SNMP_ADD_STATS_BH(tcp_statistics, field, val)
-#define TCP_ADD_STATS_USER(field, val)	SNMP_ADD_STATS_USER(tcp_statistics, field, val)
+#if defined(CONFIG_VE) && defined(CONFIG_INET)
+#define ve_tcp_statistics (get_exec_env()->_tcp_statistics)
+#else
+#define ve_tcp_statistics tcp_statistics
+#endif
+#define TCP_INC_STATS(field)		SNMP_INC_STATS(ve_tcp_statistics, field)
+#define TCP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ve_tcp_statistics, field)
+#define TCP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_tcp_statistics, field)
+#define TCP_DEC_STATS(field)		SNMP_DEC_STATS(ve_tcp_statistics, field)
+#define TCP_ADD_STATS_BH(field, val)	SNMP_ADD_STATS_BH(ve_tcp_statistics, field, val)
+#define TCP_ADD_STATS_USER(field, val)	SNMP_ADD_STATS_USER(ve_tcp_statistics, field, val)
 
 extern void			tcp_v4_err(struct sk_buff *skb, u32);
 
@@ -497,7 +514,7 @@ extern u32	__tcp_select_window(struct so
  * to use only the low 32-bits of jiffies and hide the ugly
  * casts with the following macro.
  */
-#define tcp_time_stamp		((__u32)(jiffies))
+#define tcp_time_stamp		((__u32)(jiffies + get_exec_env()->jiffies_fixup))
 
 /* This is what the send packet queuing engine uses to pass
  * TCP per-packet control information to the transmission
diff -upr linux-2.6.16.46-0.12.orig/include/net/udp.h linux-2.6.16.46-0.12-027test011/include/net/udp.h
--- linux-2.6.16.46-0.12.orig/include/net/udp.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/include/net/udp.h	2007-08-28 17:35:32.000000000 +0400
@@ -39,13 +39,19 @@ extern rwlock_t udp_hash_lock;
 
 extern int udp_port_rover;
 
-static inline int udp_lport_inuse(u16 num)
+static inline int udp_hashfn(u16 num, unsigned veid)
+{
+	return ((num + (veid ^ (veid >> 16))) & (UDP_HTABLE_SIZE - 1));
+}
+
+static inline int udp_lport_inuse(u16 num, struct ve_struct *env)
 {
 	struct sock *sk;
 	struct hlist_node *node;
 
-	sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)])
-		if (inet_sk(sk)->num == num)
+	sk_for_each(sk, node, &udp_hash[udp_hashfn(num, VEID(env))])
+		if (inet_sk(sk)->num == num &&
+		    ve_accessible_strict(sk->owner_env, env))
 			return 1;
 	return 0;
 }
@@ -75,9 +81,14 @@ extern unsigned int udp_poll(struct file
 			     poll_table *wait);
 
 DECLARE_SNMP_STAT(struct udp_mib, udp_statistics);
-#define UDP_INC_STATS(field)		SNMP_INC_STATS(udp_statistics, field)
-#define UDP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(udp_statistics, field)
-#define UDP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(udp_statistics, field)
+#ifdef CONFIG_VE
+#define ve_udp_statistics (get_exec_env()->_udp_statistics)
+#else
+#define ve_udp_statistics udp_statistics
+#endif
+#define UDP_INC_STATS(field)		SNMP_INC_STATS(ve_udp_statistics, field)
+#define UDP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ve_udp_statistics, field)
+#define UDP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_udp_statistics, field)
 
 /* /proc */
 struct udp_seq_afinfo {
diff -upr linux-2.6.16.46-0.12.orig/include/ub/beancounter.h linux-2.6.16.46-0.12-027test011/include/ub/beancounter.h
--- linux-2.6.16.46-0.12.orig/include/ub/beancounter.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/ub/beancounter.h	2007-08-28 17:35:36.000000000 +0400
@@ -0,0 +1,450 @@
+/*
+ *  include/ub/beancounter.h
+ *
+ *  Copyright (C) 1999-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ *  Andrey Savochkin	saw@sw-soft.com
+ *
+ */
+
+#ifndef _LINUX_BEANCOUNTER_H
+#define _LINUX_BEANCOUNTER_H
+
+/*
+ * Generic ratelimiting stuff.
+ */
+
+struct ub_rate_info {
+	int burst;
+	int interval; /* jiffy_t per event */
+	int bucket; /* kind of leaky bucket */
+	unsigned long last; /* last event */
+};
+
+/* Return true if rate limit permits. */
+int ub_ratelimit(struct ub_rate_info *);
+
+
+/*
+ * This magic is used to distinuish user beancounter and pages beancounter
+ * in struct page. page_ub and page_bc are placed in union and MAGIC
+ * ensures us that we don't use pbc as ubc in ub_page_uncharge().
+ */
+#define UB_MAGIC		0x62756275
+
+/*
+ *	Resource list.
+ */
+
+#define UB_KMEMSIZE	0	/* Unswappable kernel memory size including
+				 * struct task, page directories, etc.
+				 */
+#define UB_LOCKEDPAGES	1	/* Mlock()ed pages. */
+#define UB_PRIVVMPAGES	2	/* Total number of pages, counting potentially
+				 * private pages as private and used.
+				 */
+#define UB_SHMPAGES	3	/* IPC SHM segment size. */
+#define UB_DUMMY	4	/* Dummy resource (compatibility) */
+#define UB_NUMPROC	5	/* Number of processes. */
+#define UB_PHYSPAGES	6	/* All resident pages, for swapout guarantee. */
+#define UB_VMGUARPAGES	7	/* Guarantee for memory allocation,
+				 * checked against PRIVVMPAGES.
+				 */
+#define UB_OOMGUARPAGES	8	/* Guarantees against OOM kill.
+				 * Only limit is used, no accounting.
+				 */
+#define UB_NUMTCPSOCK	9	/* Number of TCP sockets. */
+#define UB_NUMFLOCK	10	/* Number of file locks. */
+#define UB_NUMPTY	11	/* Number of PTYs. */
+#define UB_NUMSIGINFO	12	/* Number of siginfos. */
+#define UB_TCPSNDBUF	13	/* Total size of tcp send buffers. */
+#define UB_TCPRCVBUF	14	/* Total size of tcp receive buffers. */
+#define UB_OTHERSOCKBUF	15	/* Total size of other socket
+				 * send buffers (all buffers for PF_UNIX).
+				 */
+#define UB_DGRAMRCVBUF	16	/* Total size of other socket
+				 * receive buffers.
+				 */
+#define UB_NUMOTHERSOCK	17	/* Number of other sockets. */
+#define UB_DCACHESIZE	18	/* Size of busy dentry/inode cache. */
+#define UB_NUMFILE	19	/* Number of open files. */
+
+#define UB_RESOURCES_COMPAT	24
+
+/* Add new resources here */
+
+#define UB_NUMXTENT	23
+#define UB_RESOURCES	24
+
+#define UB_UNUSEDPRIVVM	(UB_RESOURCES + 0)
+#define UB_TMPFSPAGES	(UB_RESOURCES + 1)
+#define UB_SWAPPAGES	(UB_RESOURCES + 2)
+#define UB_HELDPAGES	(UB_RESOURCES + 3)
+
+struct ubparm {
+	/*
+	 * A barrier over which resource allocations are failed gracefully.
+	 * If the amount of consumed memory is over the barrier further sbrk()
+	 * or mmap() calls fail, the existing processes are not killed.
+	 */
+	unsigned long	barrier;
+	/* hard resource limit */
+	unsigned long	limit;
+	/* consumed resources */
+	unsigned long	held;
+	/* maximum amount of consumed resources through the last period */
+	unsigned long	maxheld;
+	/* minimum amount of consumed resources through the last period */
+	unsigned long	minheld;
+	/* count of failed charges */
+	unsigned long	failcnt;
+};
+
+/*
+ * Kernel internal part.
+ */
+
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+#include <linux/threads.h>
+#include <linux/percpu.h>
+#include <ub/ub_debug.h>
+#include <ub/ub_decl.h>
+#include <asm/atomic.h>
+
+/*
+ * UB_MAXVALUE is essentially LONG_MAX declared in a cross-compiling safe form.
+ */
+#define UB_MAXVALUE	( (1UL << (sizeof(unsigned long)*8-1)) - 1)
+
+
+/*
+ *	Resource management structures
+ * Serialization issues:
+ *   beancounter list management is protected via ub_hash_lock
+ *   task pointers are set only for current task and only once
+ *   refcount is managed atomically
+ *   value and limit comparison and change are protected by per-ub spinlock
+ */
+
+struct page_beancounter;
+struct task_beancounter;
+struct sock_beancounter;
+
+struct page_private {
+	unsigned long		ubp_unused_privvmpages;
+	unsigned long		ubp_tmpfs_respages;
+	unsigned long		ubp_swap_pages;
+	unsigned long long	ubp_held_pages;
+};
+
+struct sock_private {
+	unsigned long		ubp_rmem_thres;
+	unsigned long		ubp_wmem_pressure;
+	unsigned long		ubp_maxadvmss;
+	unsigned long		ubp_rmem_pressure;
+	int			ubp_tw_count;
+#define UB_RMEM_EXPAND          0
+#define UB_RMEM_KEEP            1
+#define UB_RMEM_SHRINK          2
+	struct list_head	ubp_other_socks;
+	struct list_head	ubp_tcp_socks;
+	atomic_t		ubp_orphan_count;
+};
+
+struct ub_percpu_struct {
+	unsigned long unmap;
+	unsigned long swapin;
+#ifdef CONFIG_UBC_IO_ACCT
+	unsigned long long bytes_wrote;
+	unsigned long long bytes_read;
+	unsigned long long bytes_cancelled;
+#endif
+#ifdef CONFIG_UBC_DEBUG_KMEM
+	long	pages_charged;
+	long	vmalloc_charged;
+	long	pbcs;
+#endif
+	unsigned long	sync;
+	unsigned long	sync_done;
+
+	unsigned long	fsync;
+	unsigned long	fsync_done;
+
+	unsigned long	fdsync;
+	unsigned long	fdsync_done;
+
+	unsigned long	frsync;
+	unsigned long	frsync_done;
+
+	unsigned long		write;
+	unsigned long		read;
+	unsigned long long	wchar;
+	unsigned long long	rchar;
+};
+
+struct user_beancounter
+{
+	unsigned long		ub_magic;
+	atomic_t		ub_refcount;
+	struct list_head	ub_list;
+	struct hlist_node	ub_hash;
+
+	union {
+		struct rcu_head rcu;
+		struct execute_work cleanup;
+	};
+
+	spinlock_t		ub_lock;
+	uid_t			ub_uid;
+
+	struct ub_rate_info	ub_limit_rl;
+	int			ub_oom_noproc;
+
+	struct page_private	ppriv;
+#define ub_unused_privvmpages	ppriv.ubp_unused_privvmpages
+#define ub_tmpfs_respages	ppriv.ubp_tmpfs_respages
+#define ub_swap_pages		ppriv.ubp_swap_pages
+#define ub_held_pages		ppriv.ubp_held_pages
+	struct sock_private	spriv;
+#define ub_rmem_thres		spriv.ubp_rmem_thres
+#define ub_maxadvmss		spriv.ubp_maxadvmss
+#define ub_rmem_pressure	spriv.ubp_rmem_pressure
+#define ub_wmem_pressure	spriv.ubp_wmem_pressure
+#define ub_tcp_sk_list		spriv.ubp_tcp_socks
+#define ub_other_sk_list	spriv.ubp_other_socks
+#define ub_orphan_count		spriv.ubp_orphan_count
+#define ub_tw_count		spriv.ubp_tw_count
+
+	struct user_beancounter *parent;
+	void			*private_data;
+	unsigned long		ub_aflags;
+
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry	*proc;
+#endif
+
+	/* resources statistic and settings */
+	struct ubparm		ub_parms[UB_RESOURCES];
+	/* resources statistic for last interval */
+	struct ubparm		ub_store[UB_RESOURCES];
+
+	struct ub_percpu_struct	*ub_percpu;
+#ifdef CONFIG_UBC_IO_ACCT
+	/* these are protected with pb_lock */
+	unsigned long long	bytes_wrote;
+	unsigned long long	bytes_dirtied;
+	unsigned long long	bytes_dirty_missed;
+	unsigned long		io_pb_held;
+#endif
+#ifdef CONFIG_UBC_DEBUG_KMEM
+	struct list_head	ub_cclist;
+#endif
+};
+
+enum ub_severity { UB_HARD, UB_SOFT, UB_FORCE };
+
+#define UB_AFLAG_NOTIF_PAGEIN	0
+
+static inline
+struct user_beancounter *top_beancounter(struct user_beancounter *ub)
+{
+	while (ub->parent != NULL)
+		ub = ub->parent;
+	return ub;
+}
+
+static inline int ub_barrier_hit(struct user_beancounter *ub, int resource)
+{
+	return ub->ub_parms[resource].held > ub->ub_parms[resource].barrier;
+}
+
+static inline int ub_hfbarrier_hit(struct user_beancounter *ub, int resource)
+{
+	return (ub->ub_parms[resource].held >
+		((ub->ub_parms[resource].barrier) >> 1));
+}
+
+static inline int ub_barrier_farnr(struct user_beancounter *ub, int resource)
+{
+	struct ubparm *p;
+	p = ub->ub_parms + resource;
+	return p->held <= (p->barrier >> 3);
+}
+
+static inline int ub_barrier_farsz(struct user_beancounter *ub, int resource)
+{
+	struct ubparm *p;
+	p = ub->ub_parms + resource;
+	return p->held <= (p->barrier >> 3) && p->barrier >= 1024 * 1024;
+}
+
+#ifndef CONFIG_USER_RESOURCE
+
+#define ub_percpu_add(ub, f, v)	do { } while (0)
+#define ub_percpu_sub(ub, f, v)	do { } while (0)
+#define ub_percpu_inc(ub, f)	do { } while (0)
+#define ub_percpu_dec(ub, f)	do { } while (0)
+
+#define mm_ub(mm)	(NULL)
+
+extern inline struct user_beancounter *get_beancounter_byuid
+		(uid_t uid, int create) { return NULL; }
+extern inline struct user_beancounter *get_beancounter
+		(struct user_beancounter *ub) { return NULL; }
+extern inline void put_beancounter(struct user_beancounter *ub) { }
+
+static inline void ub_init_late(void) { };
+static inline void ub_init_ub0(void) { };
+
+static inline int charge_beancounter(struct user_beancounter *ub,
+			int resource, unsigned long val,
+			enum ub_severity strict) { return 0; }
+static inline void uncharge_beancounter(struct user_beancounter *ub,
+			int resource, unsigned long val) { }
+
+#else /* CONFIG_USER_RESOURCE */
+
+#define ub_percpu_add(ub, field, v)		do {			\
+		per_cpu_ptr(ub->ub_percpu, get_cpu())->field += (v);	\
+		put_cpu();						\
+	} while (0)
+#define ub_percpu_inc(ub, field) ub_percpu_add(ub, field, 1)
+
+#define ub_percpu_sub(ub, field, v)		do {			\
+		per_cpu_ptr(ub->ub_percpu, get_cpu())->field -= (v);	\
+		put_cpu();						\
+	} while (0)
+#define ub_percpu_dec(ub, field) ub_percpu_sub(ub, field, 1)
+
+#define mm_ub(mm)	((mm)->mm_ub)
+/*
+ *  Charge/uncharge operations
+ */
+
+extern int __charge_beancounter_locked(struct user_beancounter *ub,
+		int resource, unsigned long val, enum ub_severity strict);
+
+extern void __uncharge_beancounter_locked(struct user_beancounter *ub,
+		int resource, unsigned long val);
+
+extern void put_beancounter_safe(struct user_beancounter *ub);
+extern void __put_beancounter(struct user_beancounter *ub);
+
+extern void uncharge_warn(struct user_beancounter *ub, int resource,
+		unsigned long val, unsigned long held);
+
+extern const char *ub_rnames[];
+/*
+ *	Put a beancounter reference
+ */
+
+static inline void put_beancounter(struct user_beancounter *ub)
+{
+	if (unlikely(ub == NULL))
+		return;
+
+	/* FIXME - optimize not to disable interrupts and make call */
+	__put_beancounter(ub);
+}
+
+/* fast put, refcount can't reach zero */
+static inline void __put_beancounter_batch(struct user_beancounter *ub, int n)
+{
+	atomic_sub(n, &ub->ub_refcount);
+}
+
+static inline void put_beancounter_batch(struct user_beancounter *ub, int n)
+{
+	if (n > 1)
+		__put_beancounter_batch(ub, n - 1);
+	__put_beancounter(ub);
+}
+
+/*
+ *	Create a new beancounter reference
+ */
+extern struct user_beancounter *get_beancounter_byuid(uid_t uid, int create);
+
+static inline
+struct user_beancounter *get_beancounter(struct user_beancounter *ub)
+{
+	if (unlikely(ub == NULL))
+		return NULL;
+
+	atomic_inc(&ub->ub_refcount);
+	return ub;
+}
+
+static inline 
+struct user_beancounter *get_beancounter_rcu(struct user_beancounter *ub)
+{
+	return atomic_inc_not_zero(&ub->ub_refcount) ? ub : NULL;
+}
+
+static inline void get_beancounter_batch(struct user_beancounter *ub, int n)
+{
+	atomic_add(n, &ub->ub_refcount);
+}
+
+extern struct user_beancounter *get_subbeancounter_byid(
+		struct user_beancounter *,
+		int id, int create);
+
+extern void ub_init_late(void);
+extern void ub_init_ub0(void);
+
+extern int print_ub_uid(struct user_beancounter *ub, char *buf, int size);
+
+/*
+ *	Resource charging
+ * Change user's account and compare against limits
+ */
+
+static inline void ub_adjust_maxheld(struct user_beancounter *ub, int resource)
+{
+	if (ub->ub_parms[resource].maxheld < ub->ub_parms[resource].held)
+		ub->ub_parms[resource].maxheld = ub->ub_parms[resource].held;
+	if (ub->ub_parms[resource].minheld > ub->ub_parms[resource].held)
+		ub->ub_parms[resource].minheld = ub->ub_parms[resource].held;
+}
+
+int charge_beancounter(struct user_beancounter *ub, int resource,
+		unsigned long val, enum ub_severity strict);
+void uncharge_beancounter(struct user_beancounter *ub, int resource,
+		unsigned long val);
+void __charge_beancounter_notop(struct user_beancounter *ub, int resource,
+		unsigned long val);
+void __uncharge_beancounter_notop(struct user_beancounter *ub, int resource,
+		unsigned long val);
+
+static inline void charge_beancounter_notop(struct user_beancounter *ub,
+		int resource, unsigned long val)
+{
+	if (ub->parent != NULL)
+		__charge_beancounter_notop(ub, resource, val);
+}
+
+static inline void uncharge_beancounter_notop(struct user_beancounter *ub,
+		int resource, unsigned long val)
+{
+	if (ub->parent != NULL)
+		__uncharge_beancounter_notop(ub, resource, val);
+}
+
+#endif /* CONFIG_USER_RESOURCE */
+
+#ifndef CONFIG_USER_RSS_ACCOUNTING
+static inline void ub_ini_pbc(void) { }
+#else
+extern void ub_init_pbc(void);
+#endif
+#endif /* __KERNEL__ */
+#endif /* _LINUX_BEANCOUNTER_H */
diff -upr linux-2.6.16.46-0.12.orig/include/ub/disk_io.h linux-2.6.16.46-0.12-027test011/include/ub/disk_io.h
--- linux-2.6.16.46-0.12.orig/include/ub/disk_io.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/ub/disk_io.h	2007-08-28 17:35:30.000000000 +0400
@@ -0,0 +1,67 @@
+/*
+ *  include/ub/beancounter.h
+ *
+ *  Copyright (C) 1999-2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ *  Pavel Emelianov <xemul@openvz.org>
+ *
+ */
+
+#ifndef __UB_DISK_IO_H_
+#define __UB_DISK_IO_H_
+
+#ifdef CONFIG_USER_RESOURCE
+#include <ub/beancounter.h>
+static inline void ub_io_account_read(struct task_struct *tsk, size_t bytes)
+{
+	struct user_beancounter *ub;
+
+	ub = tsk->task_bc.exec_ub;
+	ub->ub_dio.read_bytes += bytes;
+}
+
+static inline void ub_io_account_write_request(struct task_struct *tsk,
+		size_t bytes)
+{
+	struct user_beancounter *ub;
+
+	ub = tsk->task_bc.exec_ub;
+	ub->ub_dio.write_bytes += bytes;
+}
+
+static inline void ub_io_account_write_cancelled(struct task_struct *tsk,
+		size_t bytes)
+{
+	struct user_beancounter *ub;
+
+	ub = tsk->task_bc.exec_ub;
+	ub->ub_dio.cancelled_write_bytes += bytes;
+}
+
+static inline void ub_io_accounting_init(struct user_beancounter *ub)
+{
+	memset(&ub->ub_dio, 0, sizeof(ub->ub_dio));
+}
+#else /* USER_RESOURCE */
+static inline void ub_io_account_read(struct task_struct *tsk, size_t bytes)
+{
+}
+
+static inline void ub_io_account_write_request(struct task_struct *tsk,
+		size_t bytes)
+{
+}
+
+static inline void ub_io_account_write_cancelled(struct task_struct *tsk,
+		size_t bytes)
+{
+}
+
+static inline void ub_io_accounting_init(struct user_beancounter *ub)
+{
+}
+#endif
+#endif
diff -upr linux-2.6.16.46-0.12.orig/include/ub/io_acct.h linux-2.6.16.46-0.12-027test011/include/ub/io_acct.h
--- linux-2.6.16.46-0.12.orig/include/ub/io_acct.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/ub/io_acct.h	2007-08-28 17:35:31.000000000 +0400
@@ -0,0 +1,103 @@
+/*
+ *  include/ub/io_acct.h
+ *
+ *  Copyright (C) 2006 SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ *  Pavel Emelianov <xemul@openvz.org>
+ *
+ */
+
+#ifndef __UB_IO_ACCT_H_
+#define __UB_IO_ACCT_H_
+
+#ifdef CONFIG_UBC_IO_ACCT
+#include <ub/beancounter.h>
+#include <ub/ub_page.h>
+
+#define page_iopb(page)	({			\
+		struct page_beancounter *pb;	\
+		pb = page_pbc(page);		\
+		rmb();				\
+		pb;				\
+	})
+
+/*
+ * IO ub is required in task context only, so if exec_ub is set
+ * to NULL this means that uses doesn't need to charge some
+ * resources. nevertheless IO activity must be accounted, so we
+ * account it to current's task beancounter.
+ */
+
+static inline struct user_beancounter *get_io_ub(void)
+{
+	struct user_beancounter *ub;
+
+	ub = get_exec_ub();
+	if (unlikely(ub == NULL))
+		ub = get_task_ub(current);
+
+	return top_beancounter(ub);
+}
+
+extern struct page_beancounter **page_pblist(struct page *);
+
+extern void ub_io_save_context(struct page *, size_t);
+extern void ub_io_release_context(struct page *pg, size_t size);
+
+static inline void ub_io_account_read(size_t bytes)
+{
+	ub_percpu_add(get_io_ub(), bytes_read, bytes);
+}
+
+static inline void ub_io_account_write(size_t bytes)
+{
+	ub_percpu_add(get_io_ub(), bytes_wrote, bytes);
+}
+
+static inline void ub_io_account_dirty(struct page *page, size_t bytes)
+{
+	ub_io_save_context(page, bytes);
+}
+
+static inline void ub_io_account_write_cancelled(size_t bytes)
+{
+	ub_percpu_add(get_io_ub(), bytes_cancelled, bytes);
+}
+
+void ub_init_io(kmem_cache_t *);
+#else /* UBC_IO_ACCT */
+#define page_iopb(page)		(NULL)
+#define page_pblist(page)	(&page_pbc(page))
+
+static inline void ub_io_release_context(struct page *pg, size_t bytes)
+{
+}
+
+static inline void ub_io_account_dirty(struct page *p, size_t bytes)
+{
+}
+
+static inline void ub_io_account_read(size_t bytes)
+{
+}
+
+static inline void ub_io_account_write(size_t bytes)
+{
+}
+
+static inline void ub_io_account_write_cancelled(size_t bytes)
+{
+}
+
+static inline void ub_init_io(kmem_cache_t *pb_cachep) { };
+#endif
+
+#ifdef CONFIG_UBC_DEBUG_IO
+extern void ub_io_release_debug(struct page *pg);
+#else
+#define ub_io_release_debug(pg)	do { } while (0)
+#endif
+#endif
diff -upr linux-2.6.16.46-0.12.orig/include/ub/proc.h linux-2.6.16.46-0.12-027test011/include/ub/proc.h
--- linux-2.6.16.46-0.12.orig/include/ub/proc.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/ub/proc.h	2007-08-28 17:35:31.000000000 +0400
@@ -0,0 +1,40 @@
+/*
+ *  include/ub/proc.h
+ *
+ *  Copyright (C) 2006  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_PROC_H_
+#define __UB_PROC_H_
+
+#include <linux/seq_file.h>
+
+struct bc_proc_entry {
+	char *name;
+	union {
+		int (*show)(struct seq_file *, void *);
+		struct file_operations *fops;
+	} u;
+	struct bc_proc_entry *next;
+	int cookie;
+};
+
+struct user_beancounter;
+
+void bc_register_proc_entry(struct bc_proc_entry *);
+void bc_register_proc_root_entry(struct bc_proc_entry *);
+
+static inline struct user_beancounter *seq_beancounter(struct seq_file *f)
+{
+	return (struct user_beancounter *)(f->private);
+}
+
+extern const char *bc_proc_lu_fmt;
+extern const char *bc_proc_lu_lfmt;
+extern const char *bc_proc_llu_fmt;
+extern const char *bc_proc_lu_lu_fmt;
+#endif
diff -upr linux-2.6.16.46-0.12.orig/include/ub/ub_dcache.h linux-2.6.16.46-0.12-027test011/include/ub/ub_dcache.h
--- linux-2.6.16.46-0.12.orig/include/ub/ub_dcache.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/ub/ub_dcache.h	2007-08-28 17:35:30.000000000 +0400
@@ -0,0 +1,49 @@
+/*
+ *  include/ub/ub_dcache.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_DCACHE_H_
+#define __UB_DCACHE_H_
+
+#include <ub/ub_decl.h>
+
+/*
+ * UB_DCACHESIZE accounting
+ */
+
+struct dentry_beancounter
+{
+	/*
+	 *  d_inuse =
+	 *         <number of external refs> +
+	 *         <number of 'used' childs>
+	 *
+	 * d_inuse == -1 means that dentry is unused
+	 * state change -1 => 0 causes charge
+	 * state change 0 => -1 causes uncharge
+	 */
+	atomic_t d_inuse;
+	/* charged size, including name length if name is not inline */
+	unsigned long d_ubsize;
+	struct user_beancounter *d_ub;
+};
+
+#ifdef CONFIG_USER_RESOURCE
+#define ub_dget_testone(d)  (atomic_inc_and_test(&(d)->dentry_bc.d_inuse))
+#define ub_dput_testzero(d) (atomic_add_negative(-1, &(d)->dentry_bc.d_inuse))
+#define INUSE_INIT		0
+
+extern int ub_dentry_on;
+extern void ub_dentry_checkup(void);
+#else
+#define ub_dget_testone(d)	(0)
+#define ub_dput_testzero(d)	(0)
+#define ub_dentry_checkup()	do { } while (0)
+#endif
+#endif
diff -upr linux-2.6.16.46-0.12.orig/include/ub/ub_dcache_op.h linux-2.6.16.46-0.12-027test011/include/ub/ub_dcache_op.h
--- linux-2.6.16.46-0.12.orig/include/ub/ub_dcache_op.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/ub/ub_dcache_op.h	2007-08-28 17:35:30.000000000 +0400
@@ -0,0 +1,100 @@
+/*
+ *  include/ub/ub_dcache_op.h
+ *
+ *  Copyright (C) 2006  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_DCACHE_OP_H_
+#define __UB_DCACHE_OP_H_
+
+struct dentry;
+
+#ifdef CONFIG_USER_RESOURCE
+
+#include <linux/spinlock.h>
+#include <ub/ub_dcache.h>
+#include <ub/ub_task.h>
+
+extern int ub_dentry_alloc_barrier;
+extern spinlock_t dcache_lock;
+
+static inline int ub_dentry_alloc(struct dentry *d)
+{
+	extern int __ub_dentry_alloc(struct dentry *);
+
+	if (!ub_dentry_on)
+		return 0;
+	return __ub_dentry_alloc(d);
+}
+
+static inline void ub_dentry_alloc_start(void)
+{
+	extern void __ub_dentry_alloc_start(void);
+
+	if (ub_dentry_alloc_barrier)
+		__ub_dentry_alloc_start();
+}
+
+static inline void ub_dentry_alloc_end(void)
+{
+	extern void __ub_dentry_alloc_end(void);
+
+	if (current->task_bc.dentry_alloc)
+		__ub_dentry_alloc_end();
+}
+
+static inline int ub_dentry_charge(struct dentry *d)
+{
+	extern int __ub_dentry_charge(struct dentry *);
+
+	if (!ub_dentry_on)
+		return 0;
+	return __ub_dentry_charge(d);
+}
+
+static inline void ub_dentry_charge_nofail(struct dentry *d)
+{
+	extern void __ub_dentry_charge_nofail(struct dentry *);
+
+	if (!ub_dentry_on)
+		return;
+	__ub_dentry_charge_nofail(d);
+}
+
+static inline void ub_dentry_uncharge_locked(struct dentry *d)
+{
+	extern void __ub_dentry_uncharge(struct dentry *);
+
+	if (!ub_dentry_on)
+		return;
+	__ub_dentry_uncharge(d);
+}
+
+static inline void ub_dentry_uncharge(struct dentry *d)
+{
+	extern void __ub_dentry_uncharge(struct dentry *);
+
+	if (!ub_dentry_on)
+		return;
+	spin_lock(&dcache_lock);
+	__ub_dentry_uncharge(d);
+	spin_unlock(&dcache_lock);
+}
+
+#else /* CONFIG_USER_RESOURCE */
+
+static inline int ub_dentry_alloc(struct dentry *d) { return 0; }
+static inline void ub_dentry_alloc_start(void) { }
+static inline void ub_dentry_alloc_end(void) { }
+static inline int ub_dentry_charge(struct dentry *d) { return 0; }
+static inline void ub_dentry_charge_nofail(struct dentry *d) { }
+static inline void ub_dentry_uncharge_locked(struct dentry *d) { }
+static inline void ub_dentry_uncharge(struct dentry *d) { }
+
+#endif /* CONFIG_USER_RESOURCE */
+
+#endif /* __UB_DCACHE_OP_H_ */
diff -upr linux-2.6.16.46-0.12.orig/include/ub/ub_debug.h linux-2.6.16.46-0.12-027test011/include/ub/ub_debug.h
--- linux-2.6.16.46-0.12.orig/include/ub/ub_debug.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/ub/ub_debug.h	2007-08-28 17:35:30.000000000 +0400
@@ -0,0 +1,112 @@
+/*
+ *  include/ub/ub_debug.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_DEBUG_H_
+#define __UB_DEBUG_H_
+
+/*
+ * general debugging
+ */
+
+#define UBD_ALLOC	0x1
+#define UBD_CHARGE	0x2
+#define UBD_LIMIT	0x4
+#define UBD_TRACE	0x8
+
+/*
+ * ub_net debugging
+ */
+
+#define UBD_NET_SOCKET	0x10
+#define UBD_NET_SLEEP	0x20
+#define UBD_NET_SEND	0x40
+#define UBD_NET_RECV	0x80
+
+/*
+ * Main routines
+ */
+
+#define UB_DEBUG (0)
+#define DEBUG_RESOURCE (0ULL)
+
+#define ub_dbg_cond(__cond, __str, args...)				\
+	do { 								\
+		if ((__cond) != 0)					\
+			printk(__str, ##args);				\
+	} while(0)
+
+#define ub_debug(__section, __str, args...) 				\
+	ub_dbg_cond(UB_DEBUG & (__section), __str, ##args)
+
+#define ub_debug_resource(__resource, __str, args...)			\
+	ub_dbg_cond((UB_DEBUG & UBD_CHARGE) && 				\
+			(DEBUG_RESOURCE & (1 << (__resource))), 	\
+			__str, ##args)
+
+#if UB_DEBUG & UBD_TRACE
+#define ub_debug_trace(__cond, __b, __r)				\
+		do {							\
+			static struct ub_rate_info ri =	{ __b, __r };	\
+			if ((__cond) != 0 && ub_ratelimit(&ri))		\
+				dump_stack(); 				\
+		} while(0)
+#else
+#define ub_debug_trace(__cond, __burst, __rate)
+#endif
+
+#include <linux/config.h>
+
+#ifdef CONFIG_UBC_DEBUG_KMEM
+#include <linux/list.h>
+#include <linux/kmem_cache.h>
+
+struct user_beancounter;
+struct ub_cache_counter {
+	struct list_head ulist;
+	struct ub_cache_counter *next;
+	struct user_beancounter *ub;
+	kmem_cache_t *cachep;
+	unsigned long counter;
+};
+
+extern spinlock_t cc_lock;
+extern void init_cache_counters(void);
+extern void ub_free_counters(struct user_beancounter *);
+extern void ub_kmemcache_free(kmem_cache_t *cachep);
+
+struct vm_struct;
+#define inc_vmalloc_charged(vm, flags)	do {				\
+		if (flags & __GFP_UBC)					\
+			ub_percpu_add(get_exec_ub(), vmalloc_charged,	\
+					vm->nr_pages);			\
+	} while (0)
+#define dec_vmalloc_charged(vm)		do {				\
+		struct user_beancounter *ub;				\
+		ub = page_ub(vm->pages[0]);				\
+		if (ub != NULL)						\
+			ub_percpu_sub(ub, vmalloc_charged,		\
+					vm->nr_pages);			\
+	} while (0)
+
+#define inc_pbc_count(ub)	ub_percpu_inc(ub, pbcs)
+#define dec_pbc_count(ub)	ub_percpu_dec(ub, pbcs)
+#else
+#define init_cache_counters()		do { } while (0)
+#define inc_vmalloc_charged(vm, f)	do { } while (0)
+#define dec_vmalloc_charged(vm)		do { } while (0)
+
+#define inc_pbc_count(ub)		do { } while (0)
+#define dec_pbc_count(ub)		do { } while (0)
+
+#define ub_free_counters(ub)		do { } while (0)
+#define ub_kmemcache_free(cachep)	do { } while (0)
+#endif
+
+#endif
diff -upr linux-2.6.16.46-0.12.orig/include/ub/ub_decl.h linux-2.6.16.46-0.12-027test011/include/ub/ub_decl.h
--- linux-2.6.16.46-0.12.orig/include/ub/ub_decl.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/ub/ub_decl.h	2007-08-28 17:35:30.000000000 +0400
@@ -0,0 +1,42 @@
+/*
+ *  include/ub/ub_decl.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_DECL_H_
+#define __UB_DECL_H_
+
+#ifdef __KERNEL__
+#include <linux/config.h>
+
+/*
+ * Naming convension:
+ * ub_<section|object>_<operation>
+ */
+
+#ifdef CONFIG_USER_RESOURCE
+
+#define UB_DECLARE_FUNC(ret_type, decl)	extern ret_type decl;
+#define UB_DECLARE_VOID_FUNC(decl)	extern void decl;
+
+#else /* CONFIG_USER_RESOURCE */
+
+#define UB_DECLARE_FUNC(ret_type, decl)		\
+	static inline ret_type decl		\
+	{					\
+		return (ret_type)0;		\
+	}
+#define UB_DECLARE_VOID_FUNC(decl)		\
+	static inline void decl			\
+	{					\
+	}
+
+#endif /* CONFIG_USER_RESOURCE */
+#endif
+
+#endif
diff -upr linux-2.6.16.46-0.12.orig/include/ub/ub_hash.h linux-2.6.16.46-0.12-027test011/include/ub/ub_hash.h
--- linux-2.6.16.46-0.12.orig/include/ub/ub_hash.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/ub/ub_hash.h	2007-08-28 17:35:30.000000000 +0400
@@ -0,0 +1,36 @@
+/*
+ *  include/ub/ub_hash.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef _LINUX_UBHASH_H
+#define _LINUX_UBHASH_H
+
+#ifdef __KERNEL__
+
+#define UB_HASH_SIZE 256
+
+extern struct hlist_head ub_hash[];
+extern spinlock_t ub_hash_lock;
+extern struct list_head ub_list_head;
+
+#ifdef CONFIG_USER_RESOURCE
+
+/*
+ * Iterate over beancounters
+ * @__ubp - beancounter ptr
+ * Can use break :)
+ */
+#define for_each_beancounter(__ubp)				\
+	list_for_each_entry_rcu(__ubp, &ub_list_head, ub_list)	\
+
+#define bc_hash_entry(ptr) hlist_entry(ptr, struct user_beancounter, ub_hash)
+
+#endif /* CONFIG_USER_RESOURCE */
+#endif /* __KERNEL__ */
+#endif /* _LINUX_UBHASH_H */
diff -upr linux-2.6.16.46-0.12.orig/include/ub/ub_mem.h linux-2.6.16.46-0.12-027test011/include/ub/ub_mem.h
--- linux-2.6.16.46-0.12.orig/include/ub/ub_mem.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/ub/ub_mem.h	2007-08-28 17:35:31.000000000 +0400
@@ -0,0 +1,78 @@
+/*
+ *  include/ub/ub_mem.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_SLAB_H_
+#define __UB_SLAB_H_
+
+#include <linux/config.h>
+#include <linux/kmem_slab.h>
+#include <ub/beancounter.h>
+#include <ub/ub_decl.h>
+
+/*
+ * UB_KMEMSIZE accounting
+ */
+
+#ifdef CONFIG_UBC_DEBUG_ITEMS
+#define CHARGE_ORDER(__o)		(1 << (__o))
+#define CHARGE_SIZE(__s)		1
+#else
+#define CHARGE_ORDER(__o)		(PAGE_SIZE << (__o))
+#define CHARGE_SIZE(__s)		(__s)
+#endif
+
+#define page_ub(__page)	((__page)->bc.page_ub)
+
+struct mm_struct;
+struct page;
+struct kmem_cache;
+
+UB_DECLARE_FUNC(struct user_beancounter *, slab_ub(void *obj))
+UB_DECLARE_FUNC(struct user_beancounter *, vmalloc_ub(void *obj))
+UB_DECLARE_FUNC(struct user_beancounter *, mem_ub(void *obj))
+
+UB_DECLARE_FUNC(int, ub_page_charge(struct page *page, int order, gfp_t mask))
+UB_DECLARE_VOID_FUNC(ub_page_uncharge(struct page *page, int order))
+UB_DECLARE_FUNC(int, ub_slab_charge(struct kmem_cache *cachep,
+			void *objp, gfp_t flags))
+UB_DECLARE_VOID_FUNC(ub_slab_uncharge(struct kmem_cache *cachep, void *obj))
+
+#define slab_ubcs(cachep, slabp) ((struct user_beancounter **)\
+		(ALIGN((unsigned long)(slab_bufctl(slabp) + (cachep)->num),\
+		       sizeof(void *))))
+
+#ifdef CONFIG_USER_RESOURCE
+extern struct user_beancounter *ub_select_worst(long *);
+
+/* mm/slab.c needed stuff */
+#define UB_ALIGN(flags)		(flags & SLAB_UBC ? sizeof(void *) : 1)
+#define UB_EXTRA(flags)		(flags & SLAB_UBC ? sizeof(void *) : 0)
+#define set_cache_objuse(cachep)	do {				\
+		(cachep)->objuse = ((PAGE_SIZE << (cachep)->gfporder) +	\
+				(cachep)->num - 1) / (cachep)->num;	\
+		if (!OFF_SLAB(cachep))					\
+			break;						\
+		(cachep)->objuse += ((cachep)->slabp_cache->objuse +	\
+				(cachep)->num - 1) / (cachep)->num;	\
+	} while (0)
+#define init_slab_ubps(cachep, slabp)	do {				\
+		if (!((cachep)->flags & SLAB_UBC))			\
+			break;						\
+		memset(slab_ubcs(cachep, slabp), 0,			\
+				(cachep)->num * sizeof(void *));	\
+	} while (0)
+#define kmem_obj_memusage(o)	(virt_to_cache(o)->objuse)
+#else
+#define UB_ALIGN(flags)		1
+#define UB_EXTRA(flags)		0
+#define set_cache_objuse(c)	do { } while (0)
+#define init_slab_ubps(c, s)	do { } while (0)
+#endif
+#endif /* __UB_SLAB_H_ */
diff -upr linux-2.6.16.46-0.12.orig/include/ub/ub_misc.h linux-2.6.16.46-0.12-027test011/include/ub/ub_misc.h
--- linux-2.6.16.46-0.12.orig/include/ub/ub_misc.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/ub/ub_misc.h	2007-08-28 17:35:31.000000000 +0400
@@ -0,0 +1,55 @@
+/*
+ *  include/ub/ub_misc.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_MISC_H_
+#define __UB_MISC_H_
+
+#include <ub/ub_decl.h>
+
+struct tty_struct;
+struct file;
+struct file_lock;
+struct sigqueue;
+
+UB_DECLARE_FUNC(int, ub_file_charge(struct file *f))
+UB_DECLARE_VOID_FUNC(ub_file_uncharge(struct file *f))
+UB_DECLARE_FUNC(int, ub_flock_charge(struct file_lock *fl, int hard))
+UB_DECLARE_VOID_FUNC(ub_flock_uncharge(struct file_lock *fl))
+UB_DECLARE_FUNC(int, ub_siginfo_charge(struct sigqueue *q,
+			struct user_beancounter *ub))
+UB_DECLARE_VOID_FUNC(ub_siginfo_uncharge(struct sigqueue *q))
+UB_DECLARE_FUNC(int, ub_task_charge(struct task_struct *parent,
+			struct task_struct *task))
+UB_DECLARE_VOID_FUNC(ub_task_uncharge(struct task_struct *task))
+UB_DECLARE_VOID_FUNC(ub_task_put(struct task_struct *task))
+UB_DECLARE_FUNC(int, ub_pty_charge(struct tty_struct *tty))
+UB_DECLARE_VOID_FUNC(ub_pty_uncharge(struct tty_struct *tty))
+
+#ifdef CONFIG_USER_RESOURCE
+#define set_flock_charged(fl)	do { (fl)->fl_charged = 1; } while (0)
+#define unset_flock_charged(fl)	do {		\
+		WARN_ON((fl)->fl_charged == 0);	\
+		(fl)->fl_charged = 0;		\
+	} while (0)
+#define set_mm_ub(mm, tsk)	do {				\
+		(mm)->mm_ub = get_beancounter(tsk ? 		\
+			tsk->task_bc.task_ub : get_exec_ub());	\
+	} while (0)
+#define put_mm_ub(mm)		do {				\
+		put_beancounter((mm)->mm_ub);			\
+		(mm)->mm_ub = NULL;				\
+	} while (0)
+#else
+#define set_flock_charged(fl)	do { } while (0)
+#define unset_flock_charged(fl)	do { } while (0)
+#define set_mm_ub(mm, tsk)	do { } while (0)
+#define put_mm_ub(mm)		do { } while (0)
+#endif
+#endif
diff -upr linux-2.6.16.46-0.12.orig/include/ub/ub_net.h linux-2.6.16.46-0.12-027test011/include/ub/ub_net.h
--- linux-2.6.16.46-0.12.orig/include/ub/ub_net.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/ub/ub_net.h	2007-08-28 17:35:31.000000000 +0400
@@ -0,0 +1,215 @@
+/*
+ *  include/ub/ub_net.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_NET_H_
+#define __UB_NET_H_
+
+/*
+ * UB_NUMXXXSOCK, UB_XXXBUF accounting
+ */
+
+#include <ub/ub_decl.h>
+#include <ub/ub_sk.h>
+#include <ub/beancounter.h>
+
+#define bid2sid(__bufid) \
+	((__bufid) == UB_TCPSNDBUF ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK)
+
+#define SOCK_MIN_UBCSPACE ((int)((2048 - sizeof(struct skb_shared_info)) & \
+			~(SMP_CACHE_BYTES-1)))
+#define SOCK_MIN_UBCSPACE_CH skb_charge_size(SOCK_MIN_UBCSPACE)
+
+static inline int ub_skb_alloc_bc(struct sk_buff *skb, gfp_t gfp_mask)
+{
+#ifdef CONFIG_USER_RESOURCE
+	memset(skb_bc(skb), 0, sizeof(struct skb_beancounter));
+#endif
+	return 0;
+}
+
+static inline void ub_skb_free_bc(struct sk_buff *skb)
+{
+}
+
+#define IS_TCP_SOCK(__family, __type) \
+		(((__family) == PF_INET || (__family) == PF_INET6) && (__type) == SOCK_STREAM)
+
+/* number of sockets */
+UB_DECLARE_FUNC(int, ub_sock_charge(struct sock *sk, int family, int type))
+UB_DECLARE_FUNC(int, ub_tcp_sock_charge(struct sock *sk))
+UB_DECLARE_FUNC(int, ub_other_sock_charge(struct sock *sk))
+UB_DECLARE_VOID_FUNC(ub_sock_uncharge(struct sock *sk))
+
+/* management of queue for send space */
+UB_DECLARE_FUNC(long, ub_sock_wait_for_space(struct sock *sk, long timeo, 
+			unsigned long size))
+UB_DECLARE_VOID_FUNC(ub_sock_snd_queue_add(struct sock *sk, int resource,
+			unsigned long size))
+UB_DECLARE_VOID_FUNC(ub_sock_sndqueuedel(struct sock *sk))
+
+/* send space */
+UB_DECLARE_FUNC(int, ub_sock_make_wreserv(struct sock *sk, int bufid,
+			unsigned long size))
+UB_DECLARE_FUNC(int, ub_sock_get_wreserv(struct sock *sk, int bufid,
+			unsigned long size))
+UB_DECLARE_VOID_FUNC(ub_sock_ret_wreserv(struct sock *sk, int bufid,
+			unsigned long size, unsigned long ressize))
+UB_DECLARE_FUNC(int, ub_sock_tcp_chargesend(struct sock *sk,
+			struct sk_buff *skb, enum ub_severity strict))
+UB_DECLARE_VOID_FUNC(ub_sock_tcp_unchargesend(struct sock *sk,
+			unsigned long size))
+UB_DECLARE_FUNC(int, ub_sock_tcp_chargepage(struct sock *sk))
+UB_DECLARE_VOID_FUNC(ub_sock_tcp_detachpage(struct sock *sk))
+
+UB_DECLARE_FUNC(int, ub_nlrcvbuf_charge(struct sk_buff *skb, struct sock *sk))
+
+/* receive space */
+UB_DECLARE_FUNC(int, ub_sockrcvbuf_charge(struct sock *sk, struct sk_buff *skb))
+UB_DECLARE_FUNC(int, ub_sock_tcp_chargerecv(struct sock *sk,
+			struct sk_buff *skb, enum ub_severity strict))
+
+/* skb destructor */
+UB_DECLARE_VOID_FUNC(ub_skb_uncharge(struct sk_buff *skb))
+
+static inline int ub_sock_makewres_other(struct sock *sk, unsigned long size)
+{
+	return ub_sock_make_wreserv(sk, UB_OTHERSOCKBUF, size);
+}
+
+static inline int ub_sock_makewres_tcp(struct sock *sk, unsigned long size)
+{
+	return ub_sock_make_wreserv(sk, UB_TCPSNDBUF, size);
+}
+
+UB_DECLARE_FUNC(int, ub_sock_getwres_other(struct sock *sk,
+			unsigned long size))
+
+static inline int ub_sock_getwres_tcp(struct sock *sk, unsigned long size)
+{
+	return ub_sock_get_wreserv(sk, UB_TCPSNDBUF, size);
+}
+
+UB_DECLARE_VOID_FUNC(ub_sock_retwres_other(struct sock *sk,
+			unsigned long size, unsigned long ressize))
+
+static inline void ub_sock_retwres_tcp(struct sock *sk, unsigned long size,
+		unsigned long ressize)
+{
+	ub_sock_ret_wreserv(sk, UB_TCPSNDBUF, size, ressize);
+}
+
+static inline void ub_sock_sndqueueadd_other(struct sock *sk, unsigned long sz)
+{
+	ub_sock_snd_queue_add(sk, UB_OTHERSOCKBUF, sz);
+}
+
+static inline void ub_sock_sndqueueadd_tcp(struct sock *sk, unsigned long sz)
+{
+	ub_sock_snd_queue_add(sk, UB_TCPSNDBUF, sz);
+}
+
+static inline int ub_tcpsndbuf_charge(struct sock *sk,
+		struct sk_buff *skb)
+{
+	return ub_sock_tcp_chargesend(sk, skb, UB_HARD);
+}
+
+static inline int ub_tcpsndbuf_charge_forced(struct sock *sk,
+		struct sk_buff *skb)
+{
+	return ub_sock_tcp_chargesend(sk, skb, UB_FORCE);
+}
+
+static inline int ub_tcprcvbuf_charge(struct sock *sk, struct sk_buff *skb)
+{
+	return ub_sock_tcp_chargerecv(sk, skb, UB_SOFT);
+}
+
+static inline int ub_tcprcvbuf_charge_forced(struct sock *sk,
+		struct sk_buff *skb)
+{
+	return ub_sock_tcp_chargerecv(sk, skb, UB_FORCE);
+}
+
+/* Charge size */
+static inline unsigned long skb_charge_datalen(unsigned long chargesize)
+{
+#ifdef CONFIG_USER_RESOURCE
+	unsigned long slabsize;
+
+	chargesize -= sizeof(struct sk_buff);
+	slabsize = 64;
+	do {
+		slabsize <<= 1;
+	} while (slabsize <= chargesize);
+
+	slabsize >>= 1;
+	return (slabsize - sizeof(struct skb_shared_info)) &
+		~(SMP_CACHE_BYTES-1);
+#else
+	return 0;
+#endif
+}
+
+static inline unsigned long skb_charge_size_gen(unsigned long size)
+{
+#ifdef CONFIG_USER_RESOURCE
+	unsigned int slabsize;
+
+	size = SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info);
+	slabsize = 32; /* min size is 64 because of skb_shared_info */
+	do {
+		slabsize <<= 1;
+	} while (slabsize < size);
+
+	return slabsize + sizeof(struct sk_buff);
+#else
+	return 0;
+#endif
+
+}
+
+static inline unsigned long skb_charge_size_const(unsigned long size)
+{
+#ifdef CONFIG_USER_RESOURCE
+	unsigned int ret;
+	if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 64)
+		ret = 64 + sizeof(struct sk_buff);
+	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 128)
+		ret = 128 + sizeof(struct sk_buff);
+	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 256)
+		ret = 256 + sizeof(struct sk_buff);
+	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 512)
+		ret = 512 + sizeof(struct sk_buff);
+	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 1024)
+		ret = 1024 + sizeof(struct sk_buff);
+	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 2048)
+		ret = 2048 + sizeof(struct sk_buff);
+	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 4096)
+		ret = 4096 + sizeof(struct sk_buff);
+	else
+		ret = skb_charge_size_gen(size);
+	return ret;
+#else
+	return 0;
+#endif
+}
+
+
+#define skb_charge_size(__size)			\
+	(__builtin_constant_p(__size)	?	\
+	 skb_charge_size_const(__size)	:	\
+	 skb_charge_size_gen(__size))
+
+UB_DECLARE_FUNC(int, skb_charge_fullsize(struct sk_buff *skb))
+UB_DECLARE_VOID_FUNC(ub_skb_set_charge(struct sk_buff *skb,
+			struct sock *sk, unsigned long size, int res))
+
+#endif
diff -upr linux-2.6.16.46-0.12.orig/include/ub/ub_oom.h linux-2.6.16.46-0.12-027test011/include/ub/ub_oom.h
--- linux-2.6.16.46-0.12.orig/include/ub/ub_oom.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/ub/ub_oom.h	2007-08-28 17:35:31.000000000 +0400
@@ -0,0 +1,26 @@
+#include <ub/ub_decl.h>
+#include <ub/ub_task.h>
+
+UB_DECLARE_FUNC(int, ub_oom_lock(void))
+UB_DECLARE_FUNC(struct user_beancounter *, ub_oom_select_worst(void))
+UB_DECLARE_VOID_FUNC(ub_oom_mm_killed(struct user_beancounter *ub))
+UB_DECLARE_VOID_FUNC(ub_oom_unlock(void))
+UB_DECLARE_VOID_FUNC(ub_out_of_memory(struct user_beancounter *ub))
+UB_DECLARE_VOID_FUNC(ub_oom_task_dead(struct task_struct *tsk))
+UB_DECLARE_FUNC(int, ub_oom_task_skip(struct user_beancounter *ub,
+			struct task_struct *tsk))
+
+#ifdef CONFIG_USER_RESOURCE
+extern int oom_generation;
+extern int oom_kill_counter;
+#define ub_oom_start() do {						\
+		current->task_bc.oom_generation = oom_generation;	\
+	} while (0)
+#define ub_oom_task_killed(p) do { 					\
+		oom_kill_counter++;					\
+		wake_up_process(p);					\
+	} while (0)
+#else
+#define ub_oom_start()			do { } while (0)
+#define ub_oom_task_killed(p)		do { } while (0)
+#endif
diff -upr linux-2.6.16.46-0.12.orig/include/ub/ub_orphan.h linux-2.6.16.46-0.12-027test011/include/ub/ub_orphan.h
--- linux-2.6.16.46-0.12.orig/include/ub/ub_orphan.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/ub/ub_orphan.h	2007-08-28 17:35:30.000000000 +0400
@@ -0,0 +1,106 @@
+/*
+ *  include/ub/ub_orphan.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_ORPHAN_H_
+#define __UB_ORPHAN_H_
+
+#include <net/tcp.h>
+
+#include "ub/beancounter.h"
+#include "ub/ub_net.h"
+
+
+static inline atomic_t *__ub_get_orphan_count_ptr(struct sock *sk)
+{
+#ifdef CONFIG_USER_RESOURCE
+	if (sock_has_ubc(sk))
+		return &sock_bc(sk)->ub->ub_orphan_count;
+#endif
+	return sk->sk_prot->orphan_count;
+}
+
+static inline void ub_inc_orphan_count(struct sock *sk)
+{
+	atomic_inc(__ub_get_orphan_count_ptr(sk));
+}
+
+static inline void ub_dec_orphan_count(struct sock *sk)
+{
+	atomic_dec(__ub_get_orphan_count_ptr(sk));
+}
+
+static inline int ub_get_orphan_count(struct sock *sk)
+{
+	return atomic_read(__ub_get_orphan_count_ptr(sk));
+}
+
+extern int __ub_too_many_orphans(struct sock *sk, int count);
+static inline int ub_too_many_orphans(struct sock *sk, int count)
+{
+#ifdef CONFIG_USER_RESOURCE
+	if (__ub_too_many_orphans(sk, count))
+		return 1;
+#endif
+	return (ub_get_orphan_count(sk) > sysctl_tcp_max_orphans ||
+		(sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
+		 atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]));
+}
+
+#include <ub/ub_mem.h>
+#include <linux/kmem_cache.h>
+
+struct inet_timewait_sock;
+
+static inline void ub_timewait_mod(struct inet_timewait_sock *tw, int incdec)
+{
+#ifdef CONFIG_USER_RESOURCE
+	struct user_beancounter *ub;
+
+	ub = slab_ub(tw);
+	if (ub != NULL)
+		ub->ub_tw_count += incdec;
+#endif
+}
+
+static inline int __ub_timewait_check(struct sock *sk)
+{
+#ifdef CONFIG_USER_RESOURCE
+	struct user_beancounter *ub;
+	unsigned long mem_max, mem;
+	int tw_count;
+
+	ub = sock_bc(sk)->ub;
+	if (ub == NULL)
+		return 1;
+
+	tw_count = ub->ub_tw_count;
+	mem_max = sysctl_tcp_max_tw_kmem_fraction *
+		((ub->ub_parms[UB_KMEMSIZE].limit >> 10) + 1);
+	mem = tw_count * sk->sk_prot_creator->twsk_prot->twsk_slab->objuse;
+	return tw_count < sysctl_tcp_max_tw_buckets_ub && mem < mem_max;
+#else
+	return 1;
+#endif
+}
+
+#define ub_timewait_inc(tw, twdr) do {			\
+		if ((twdr)->ub_managed)			\
+			ub_timewait_mod(tw, 1);		\
+	} while (0)
+
+#define ub_timewait_dec(tw, twdr) do {			\
+		if ((twdr)->ub_managed)			\
+			ub_timewait_mod(tw, -1);	\
+	} while (0)
+
+#define ub_timewait_check(sk, twdr) ((!(twdr)->ub_managed) || \
+					__ub_timewait_check(sk))
+
+#endif
diff -upr linux-2.6.16.46-0.12.orig/include/ub/ub_page.h linux-2.6.16.46-0.12-027test011/include/ub/ub_page.h
--- linux-2.6.16.46-0.12.orig/include/ub/ub_page.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/ub/ub_page.h	2007-08-28 17:35:30.000000000 +0400
@@ -0,0 +1,59 @@
+/*
+ *  include/ub/ub_page.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_PAGE_H_
+#define __UB_PAGE_H_
+
+#include <linux/config.h>
+
+/*
+ * Page_beancounters
+ */
+
+struct page;
+struct user_beancounter;
+
+#define PB_MAGIC 0x62700001UL
+
+struct page_beancounter {
+	unsigned long pb_magic;
+	struct page *page;
+	struct user_beancounter *ub;
+	union {
+		struct page_beancounter *next_hash;
+		struct page_beancounter *page_pb_list;
+	};
+	union {
+		unsigned refcount;
+		unsigned io_debug;
+	};
+	union {
+		struct list_head page_list;
+		struct list_head io_list;
+	};
+};
+
+#define PB_REFCOUNT_BITS 24
+#define PB_SHIFT_GET(c) ((c) >> PB_REFCOUNT_BITS)
+#define PB_SHIFT_INC(c) ((c) += (1 << PB_REFCOUNT_BITS))
+#define PB_SHIFT_DEC(c) ((c) -= (1 << PB_REFCOUNT_BITS))
+#define PB_COUNT_GET(c) ((c) & ((1 << PB_REFCOUNT_BITS) - 1))
+#define PB_COUNT_INC(c) ((c)++)
+#define PB_COUNT_DEC(c) ((c)--)
+#define PB_REFCOUNT_MAKE(s, c) (((s) << PB_REFCOUNT_BITS) + (c))
+
+#define page_pbc(__page)        ((__page)->bc.page_pb)
+
+extern spinlock_t pb_lock;
+
+struct address_space;
+extern int is_shmem_mapping(struct address_space *);
+
+#endif
diff -upr linux-2.6.16.46-0.12.orig/include/ub/ub_sk.h linux-2.6.16.46-0.12-027test011/include/ub/ub_sk.h
--- linux-2.6.16.46-0.12.orig/include/ub/ub_sk.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/ub/ub_sk.h	2007-08-28 17:35:31.000000000 +0400
@@ -0,0 +1,48 @@
+/*
+ *  include/ub/ub_sk.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_SK_H_
+#define __UB_SK_H_
+
+#include <linux/config.h>
+#include <ub/ub_task.h>
+
+struct sock;
+struct sk_buff;
+
+struct skb_beancounter {
+	struct user_beancounter *ub;
+	unsigned long charged:27, resource:5;
+};
+
+struct sock_beancounter {
+	struct user_beancounter *ub;
+	/*
+	 * poll_reserv accounts space already charged for future sends.
+	 * It is required to make poll agree with sendmsg.
+	 * Additionally, it makes real charges (with taking bc spinlock)
+	 * in the send path rarer, speeding networking up.
+	 * For TCP (only): changes are protected by socket lock (not bc!)
+	 * For all proto: may be read without serialization in poll.
+	 */
+	unsigned long           poll_reserv;
+	unsigned long		forw_space;
+	/* fields below are protected by bc spinlock */
+	unsigned long           ub_waitspc;     /* space waiting for */
+	unsigned long           ub_wcharged;
+	struct list_head        ub_sock_list;
+};
+
+#define sock_bc(__sk)		(&(__sk)->sk_bc)
+#define skb_bc(__skb)		(&(__skb)->skb_bc)
+#define skbc_sock(__skbc)	(container_of(__skbc, struct sock, sk_bc))
+#define sock_has_ubc(__sk)	(sock_bc(__sk)->ub != NULL)
+
+#endif
diff -upr linux-2.6.16.46-0.12.orig/include/ub/ub_stat.h linux-2.6.16.46-0.12-027test011/include/ub/ub_stat.h
--- linux-2.6.16.46-0.12.orig/include/ub/ub_stat.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/ub/ub_stat.h	2007-08-28 17:35:30.000000000 +0400
@@ -0,0 +1,70 @@
+/*
+ *  include/ub/ub_stat.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_STAT_H_
+#define __UB_STAT_H_
+
+/* sys_ubstat commands list */
+#define UBSTAT_READ_ONE			0x010000
+#define UBSTAT_READ_ALL			0x020000
+#define UBSTAT_READ_FULL		0x030000
+#define UBSTAT_UBLIST			0x040000
+#define UBSTAT_UBPARMNUM		0x050000
+#define UBSTAT_GETTIME			0x060000
+
+#define UBSTAT_CMD(func)		((func) & 0xF0000)
+#define UBSTAT_PARMID(func)		((func) & 0x0FFFF)
+
+#define TIME_MAX_SEC		(LONG_MAX / HZ)
+#define TIME_MAX_JIF		(TIME_MAX_SEC * HZ)
+
+typedef unsigned long ubstattime_t;
+
+typedef struct {
+	ubstattime_t	start_time;
+	ubstattime_t	end_time;
+	ubstattime_t	cur_time;
+} ubgettime_t;
+
+typedef struct {
+	long		maxinterval;
+	int		signum;
+} ubnotifrq_t;
+
+typedef struct {
+	unsigned long	maxheld;
+	unsigned long	failcnt;
+} ubstatparm_t;
+
+typedef struct {
+	unsigned long	barrier;
+	unsigned long	limit;
+	unsigned long	held;
+	unsigned long	maxheld;
+	unsigned long	minheld;
+	unsigned long	failcnt;
+	unsigned long __unused1;
+	unsigned long __unused2;
+} ubstatparmf_t;
+
+typedef struct {
+	ubstattime_t	start_time;
+	ubstattime_t	end_time;
+	ubstatparmf_t	param[0];
+} ubstatfull_t;
+
+#ifdef __KERNEL__
+struct ub_stat_notify {
+	struct list_head	list;
+	struct task_struct	*task;
+	int			signum;
+};
+#endif
+#endif
diff -upr linux-2.6.16.46-0.12.orig/include/ub/ub_task.h linux-2.6.16.46-0.12-027test011/include/ub/ub_task.h
--- linux-2.6.16.46-0.12.orig/include/ub/ub_task.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/ub/ub_task.h	2007-08-28 17:35:30.000000000 +0400
@@ -0,0 +1,69 @@
+/*
+ *  include/ub/ub_task.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_TASK_H_
+#define __UB_TASK_H_
+
+struct user_beancounter;
+
+
+#ifdef CONFIG_USER_RESOURCE
+struct task_beancounter {
+	struct user_beancounter	*exec_ub;
+	struct user_beancounter *saved_ub;
+	struct user_beancounter	*task_ub;
+	struct user_beancounter *fork_sub;
+	unsigned long file_precharged, file_quant, file_count;
+	unsigned long kmem_precharged;
+	char dentry_alloc, pgfault_handle;
+	void *task_fnode, *task_freserv;
+	unsigned long oom_generation;
+	unsigned long task_data[4];
+	unsigned long pgfault_allot;
+};
+
+#define get_task_ub(__task)	((__task)->task_bc.task_ub)
+
+extern struct user_beancounter ub0;
+#define get_ub0()	(&ub0)
+
+#define ub_save_context(t)	do {				\
+		t->task_bc.saved_ub = t->task_bc.exec_ub;	\
+		t->task_bc.exec_ub = get_ub0();			\
+	} while (0)
+#define ub_restore_context(t)	do {				\
+		t->task_bc.exec_ub = t->task_bc.saved_ub;	\
+	} while (0)
+
+#define get_exec_ub()		(current->task_bc.exec_ub)
+#define set_exec_ub(__newub)		\
+({					\
+	struct user_beancounter *old;	\
+	struct task_beancounter *tbc;	\
+ 					\
+	tbc = &current->task_bc;	\
+	old = tbc->exec_ub;		\
+	tbc->exec_ub = __newub;		\
+	old;				\
+})
+
+void ub_init_task_bc(struct task_beancounter *);
+
+#else /* CONFIG_USER_RESOURCE */
+
+#define get_ub0()		(NULL)
+#define get_exec_ub()		(NULL)
+#define get_task_ub(task)	(NULL)
+#define set_exec_ub(__ub)	(NULL)
+#define ub_save_context(t)	do { } while (0)
+#define ub_restore_context(t)	do { } while (0)
+
+#endif /* CONFIG_USER_RESOURCE */
+#endif /* __UB_TASK_H_ */
diff -upr linux-2.6.16.46-0.12.orig/include/ub/ub_tcp.h linux-2.6.16.46-0.12-027test011/include/ub/ub_tcp.h
--- linux-2.6.16.46-0.12.orig/include/ub/ub_tcp.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/ub/ub_tcp.h	2007-08-28 17:35:30.000000000 +0400
@@ -0,0 +1,76 @@
+/*
+ *  include/ub/ub_tcp.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_TCP_H_
+#define __UB_TCP_H_
+
+/*
+ * UB_NUMXXXSOCK, UB_XXXBUF accounting
+ */
+
+#include <ub/ub_sk.h>
+#include <ub/beancounter.h>
+
+static inline void ub_tcp_update_maxadvmss(struct sock *sk)
+{
+#ifdef CONFIG_USER_RESOURCE
+	if (!sock_has_ubc(sk))
+		return;
+	if (sock_bc(sk)->ub->ub_maxadvmss >= tcp_sk(sk)->advmss)
+		return;
+
+	sock_bc(sk)->ub->ub_maxadvmss =
+		skb_charge_size(MAX_HEADER + sizeof(struct iphdr)
+				+ sizeof(struct tcphdr)	+ tcp_sk(sk)->advmss);
+#endif
+}
+
+static inline int ub_tcp_rmem_allows_expand(struct sock *sk)
+{
+	if (tcp_memory_pressure)
+		return 0;
+#ifdef CONFIG_USER_RESOURCE
+	if (sock_has_ubc(sk)) {
+		struct user_beancounter *ub;
+
+		ub = sock_bc(sk)->ub;
+		if (ub->ub_rmem_pressure == UB_RMEM_EXPAND)
+			return 1;
+		if (ub->ub_rmem_pressure == UB_RMEM_SHRINK)
+			return 0;
+		return sk->sk_rcvbuf <= ub->ub_rmem_thres;
+	}
+#endif
+	return 1;
+}
+
+static inline int ub_tcp_memory_pressure(struct sock *sk)
+{
+	if (tcp_memory_pressure)
+		return 1;
+#ifdef CONFIG_USER_RESOURCE
+	if (sock_has_ubc(sk))
+		return sock_bc(sk)->ub->ub_rmem_pressure != UB_RMEM_EXPAND;
+#endif
+	return 0;
+}
+
+static inline int ub_tcp_shrink_rcvbuf(struct sock *sk)
+{
+	if (tcp_memory_pressure)
+		return 1;
+#ifdef CONFIG_USER_RESOURCE
+	if (sock_has_ubc(sk))
+		return sock_bc(sk)->ub->ub_rmem_pressure == UB_RMEM_SHRINK;
+#endif
+	return 0;
+}
+
+#endif
diff -upr linux-2.6.16.46-0.12.orig/include/ub/ub_vmpages.h linux-2.6.16.46-0.12-027test011/include/ub/ub_vmpages.h
--- linux-2.6.16.46-0.12.orig/include/ub/ub_vmpages.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/include/ub/ub_vmpages.h	2007-08-28 17:35:30.000000000 +0400
@@ -0,0 +1,153 @@
+/*
+ *  include/ub/ub_vmpages.h
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#ifndef __UB_PAGES_H_
+#define __UB_PAGES_H_
+
+#include <linux/linkage.h>
+#include <linux/config.h>
+#include <ub/beancounter.h>
+#include <ub/ub_decl.h>
+
+/*
+ * Check whether vma has private or copy-on-write mapping.
+ * Should match checks in ub_protected_charge().
+ */
+#define VM_UB_PRIVATE(__flags, __file)					\
+		( ((__flags) & VM_WRITE) ?				\
+			(__file) == NULL || !((__flags) & VM_SHARED) :	\
+			0						\
+		)
+
+/* Mprotect charging result */
+#define PRIVVM_ERROR		-1
+#define PRIVVM_NO_CHARGE	 0 /* UB_DECLARE_FUNC retval with ubc off */
+#define PRIVVM_TO_PRIVATE	 1
+#define PRIVVM_TO_SHARED	 2
+
+UB_DECLARE_FUNC(int, ub_protected_charge(struct mm_struct *mm,
+			unsigned long size,
+			unsigned long newflags,
+			struct vm_area_struct *vma))
+
+UB_DECLARE_VOID_FUNC(ub_unused_privvm_add(struct mm_struct *mm,
+			struct vm_area_struct *vma,
+			unsigned long num))
+#define ub_unused_privvm_inc(mm, vma)	ub_unused_privvm_add(mm, vma, 1)
+UB_DECLARE_VOID_FUNC(ub_unused_privvm_sub(struct mm_struct *mm,
+			struct vm_area_struct *vma,
+			unsigned long num))
+#define ub_unused_privvm_dec(mm, vma)	ub_unused_privvm_sub(mm, vma, 1)
+
+UB_DECLARE_VOID_FUNC(__ub_unused_privvm_dec(struct mm_struct *mm,
+			long sz))
+
+UB_DECLARE_FUNC(int, ub_memory_charge(struct mm_struct *mm,
+			unsigned long size,
+			unsigned vm_flags,
+			struct file *vm_file,
+			int strict))
+UB_DECLARE_VOID_FUNC(ub_memory_uncharge(struct mm_struct *mm,
+			unsigned long size,
+			unsigned vm_flags,
+			struct file *vm_file))
+
+struct shmem_inode_info;
+UB_DECLARE_FUNC(int, ub_shmpages_charge(struct shmem_inode_info *i,
+			unsigned long sz))
+UB_DECLARE_VOID_FUNC(ub_shmpages_uncharge(struct shmem_inode_info *i,
+			unsigned long sz))
+UB_DECLARE_VOID_FUNC(ub_tmpfs_respages_inc(struct shmem_inode_info *shi))
+UB_DECLARE_VOID_FUNC(ub_tmpfs_respages_sub(struct shmem_inode_info *shi,
+			unsigned long size))
+#define ub_tmpfs_respages_dec(shi)	ub_tmpfs_respages_sub(shi, 1)
+
+#ifdef CONFIG_USER_RESOURCE
+#define shmi_ub_set(shi, ub)	do {			\
+		(shi)->shmi_ub = get_beancounter(ub);	\
+	} while (0)
+#define shmi_ub_put(shi)	do {			\
+		put_beancounter((shi)->shmi_ub);	\
+		(shi)->shmi_ub = NULL;			\
+	} while (0)
+#else
+#define shmi_ub_set(shi, ub)	do { } while (0)
+#define shmi_ub_put(shi)	do { } while (0)
+#endif
+
+UB_DECLARE_FUNC(int, ub_locked_charge(struct mm_struct *mm,
+			unsigned long size))
+UB_DECLARE_VOID_FUNC(ub_locked_uncharge(struct mm_struct *mm,
+			unsigned long size))
+UB_DECLARE_FUNC(int, ub_lockedshm_charge(struct shmem_inode_info *shi,
+			unsigned long size))
+UB_DECLARE_VOID_FUNC(ub_lockedshm_uncharge(struct shmem_inode_info *shi,
+			unsigned long size))
+
+UB_DECLARE_FUNC(unsigned long, pages_in_vma_range(struct vm_area_struct *vma,
+			unsigned long addr, unsigned long end))
+#define pages_in_vma(vma)	(pages_in_vma_range(vma, \
+			vma->vm_start, vma->vm_end))
+
+#define UB_PAGE_WEIGHT_SHIFT 24
+#define UB_PAGE_WEIGHT (1 << UB_PAGE_WEIGHT_SHIFT)
+
+struct page_beancounter;
+#define PBC_COPY_SAME	((struct page_beancounter *) 1)
+
+/* Mprotect charging result */
+#define PRIVVM_ERROR		-1
+#define PRIVVM_NO_CHARGE	0
+#define PRIVVM_TO_PRIVATE	1
+#define PRIVVM_TO_SHARED	2
+
+extern void fastcall __ub_update_physpages(struct user_beancounter *ub);
+extern void fastcall __ub_update_oomguarpages(struct user_beancounter *ub);
+extern void fastcall __ub_update_privvm(struct user_beancounter *ub);
+
+#ifdef CONFIG_USER_RSS_ACCOUNTING
+#define PB_DECLARE_FUNC(ret, decl)	UB_DECLARE_FUNC(ret, decl)
+#define PB_DECLARE_VOID_FUNC(decl)	UB_DECLARE_VOID_FUNC(decl)
+#else
+#define PB_DECLARE_FUNC(ret, decl)	static inline ret decl {return (ret)0;}
+#define PB_DECLARE_VOID_FUNC(decl)	static inline void decl { }
+#endif
+
+PB_DECLARE_FUNC(int, pb_alloc(struct page_beancounter **pbc))
+PB_DECLARE_FUNC(int, pb_alloc_list(struct page_beancounter **pbc, int num))
+PB_DECLARE_FUNC(int, pb_alloc_all(struct page_beancounter **pbc))
+PB_DECLARE_VOID_FUNC(pb_add_ref(struct page *page,
+			struct mm_struct *mm,
+			struct page_beancounter **pbc))
+PB_DECLARE_VOID_FUNC(pb_dup_ref(struct page *page,
+			struct mm_struct *mm,
+			struct page_beancounter **pbc))
+PB_DECLARE_VOID_FUNC(pb_free_list(struct page_beancounter **pb))
+PB_DECLARE_VOID_FUNC(pb_free(struct page_beancounter **pb))
+PB_DECLARE_VOID_FUNC(pb_remove_ref(struct page *page,
+			struct mm_struct *mm))
+
+PB_DECLARE_FUNC(struct user_beancounter *, pb_grab_page_ub(struct page *page))
+#endif
+
+#ifdef CONFIG_USER_SWAP_ACCOUNTING
+#define SWP_DECLARE_FUNC(ret, decl)	UB_DECLARE_FUNC(ret, decl)
+#define SWP_DECLARE_VOID_FUNC(decl)	UB_DECLARE_VOID_FUNC(decl)
+#else
+#define SWP_DECLARE_FUNC(ret, decl)	static inline ret decl {return (ret)0;}
+#define SWP_DECLARE_VOID_FUNC(decl)	static inline void decl { }
+#endif
+
+struct swap_info_struct;
+SWP_DECLARE_FUNC(int, ub_swap_init(struct swap_info_struct *si, pgoff_t n))
+SWP_DECLARE_VOID_FUNC(ub_swap_fini(struct swap_info_struct *si))
+SWP_DECLARE_VOID_FUNC(ub_swapentry_inc(struct swap_info_struct *si, pgoff_t n,
+			struct user_beancounter *ub))
+SWP_DECLARE_VOID_FUNC(ub_swapentry_dec(struct swap_info_struct *si, pgoff_t n))
diff -upr linux-2.6.16.46-0.12.orig/init/calibrate.c linux-2.6.16.46-0.12-027test011/init/calibrate.c
--- linux-2.6.16.46-0.12.orig/init/calibrate.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/init/calibrate.c	2007-08-28 17:35:31.000000000 +0400
@@ -7,6 +7,7 @@
 #include <linux/sched.h>
 #include <linux/delay.h>
 #include <linux/init.h>
+#include <linux/module.h>
 
 #include <asm/timex.h>
 
@@ -105,6 +106,60 @@ static unsigned long __devinit calibrate
 static unsigned long __devinit calibrate_delay_direct(void) {return 0;}
 #endif
 
+unsigned long cycles_per_jiffy, cycles_per_clock;
+
+static __devinit void calibrate_cycles(void)
+{
+	unsigned long ticks;
+	cycles_t time;
+
+	ticks = jiffies;
+	while (ticks == jiffies)
+		/* nothing */;
+	time = get_cycles();
+	ticks = jiffies;
+	while (ticks == jiffies)
+		/* nothing */;
+
+	time = get_cycles() - time;
+	cycles_per_jiffy = time;
+	if ((time >> 32) != 0) {
+		printk("CPU too fast! timings are incorrect\n");
+		cycles_per_jiffy = -1;
+	}
+}
+
+EXPORT_SYMBOL(cycles_per_jiffy);
+EXPORT_SYMBOL(cycles_per_clock);
+
+static __devinit void calc_cycles_per_jiffy(void)
+{
+#if defined(__i386__)
+	extern unsigned long fast_gettimeoffset_quotient;
+	unsigned long low, high;
+
+	if (fast_gettimeoffset_quotient != 0) {
+		__asm__("divl %2"
+				:"=a" (low), "=d" (high)
+				:"r" (fast_gettimeoffset_quotient),
+				"0" (0), "1" (1000000/HZ));
+
+		cycles_per_jiffy = low;
+	}
+#endif
+	if (cycles_per_jiffy == 0)
+		calibrate_cycles();
+
+	if (cycles_per_jiffy == 0) {
+		printk(KERN_WARNING "Cycles are stuck! "
+				"Some statistics will not be available.");
+		/* to prevent division by zero in cycles_to_(clocks|jiffies) */
+		cycles_per_jiffy = 1;
+		cycles_per_clock = 1;
+	} else
+		cycles_per_clock = cycles_per_jiffy * (HZ / CLOCKS_PER_SEC);
+}
+
 /*
  * This is the number of bits of precision for the loops_per_jiffy.  Each
  * bit takes on average 1.5/HZ seconds.  This (like the original) is a little
@@ -170,4 +225,5 @@ void __devinit calibrate_delay(void)
 			loops_per_jiffy);
 	}
 
+	calc_cycles_per_jiffy();
 }
diff -upr linux-2.6.16.46-0.12.orig/init/main.c linux-2.6.16.46-0.12-027test011/init/main.c
--- linux-2.6.16.46-0.12.orig/init/main.c	2007-08-24 19:28:27.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/init/main.c	2007-08-28 17:35:34.000000000 +0400
@@ -50,6 +50,8 @@
 #include <linux/mempolicy.h>
 #include <linux/key.h>
 
+#include <ub/beancounter.h>
+
 #include <asm/io.h>
 #include <asm/bugs.h>
 #include <asm/setup.h>
@@ -86,6 +88,7 @@ extern void sbus_init(void);
 extern void sysctl_init(void);
 extern void signals_init(void);
 extern void buffer_init(void);
+extern void fairsched_init_late(void);
 extern void pidhash_init(void);
 extern void pidmap_init(void);
 extern void prio_tree_init(void);
@@ -110,6 +113,26 @@ extern void tc_init(void);
 enum system_states system_state;
 EXPORT_SYMBOL(system_state);
 
+#ifdef CONFIG_VE
+extern void init_ve_system(void);
+extern void init_ve0(void);
+extern void prepare_ve0_process(struct task_struct *tsk);
+extern void prepare_ve0_proc_root(void);
+extern void prepare_ve0_sysctl(void);
+#else
+#define init_ve_system()		do { } while (0)
+#define init_ve0()			do { } while (0)
+#define prepare_ve0_process(tsk)	do { } while (0)
+#define prepare_ve0_proc_root()		do { } while (0)
+#define prepare_ve0_sysctl()		do { } while (0)
+#endif
+
+#if defined(CONFIG_VE) && defined(CONFIG_NET)
+extern void prepare_ve0_loopback(void);
+#else
+#define prepare_ve0_loopback()		do { } while (0)
+#endif
+
 /*
  * The kernel_magic value represents the address of _end, which allows
  * namelist tools to "match" each other respectively.  That way a tool
@@ -500,6 +523,10 @@ asmlinkage void __init start_kernel(void
  * enable them
  */
 	lock_kernel();
+	/*
+	 * Prepare ub0 to account early allocations if any
+	 */
+	ub_init_ub0();
 	page_address_init();
 	printk(KERN_NOTICE);
 	printk(linux_banner);
@@ -513,6 +540,9 @@ asmlinkage void __init start_kernel(void
 	 */
 	smp_prepare_boot_cpu();
 
+	prepare_ve0_process(&init_task);
+	init_ve0();
+
 	/*
 	 * Set up the scheduler prior starting any interrupts (such as the
 	 * timer interrupt). Full topology setup happens at smp_init()
@@ -586,6 +616,7 @@ asmlinkage void __init start_kernel(void
 #endif
 	fork_init(num_physpages);
 	proc_caches_init();
+	ub_init_late();
 	buffer_init();
 	unnamed_dev_init();
 	key_init();
@@ -596,6 +627,8 @@ asmlinkage void __init start_kernel(void
 	/* rootfs populating might need page-writeback */
 	page_writeback_init();
 #ifdef CONFIG_PROC_FS
+	prepare_ve0_proc_root();
+	prepare_ve0_sysctl();
 	proc_root_init();
 #endif
 	cpuset_init();
@@ -604,6 +637,10 @@ asmlinkage void __init start_kernel(void
 
 	check_bugs();
 
+#ifdef CONFIG_USER_RSS_ACCOUNTING
+	ub_init_pbc();
+#endif
+
 	/* Do the rest non-__init'ed, we're now alive */
 	rest_init();
 }
@@ -665,6 +702,9 @@ static void __init do_initcalls(void)
  */
 static void __init do_basic_setup(void)
 {
+	prepare_ve0_loopback();
+	init_ve_system();
+
 	/* drivers will send hotplug events */
 	init_workqueues();
 	usermodehelper_init();
@@ -680,7 +720,7 @@ static void __init do_basic_setup(void)
 static void do_pre_smp_initcalls(void)
 {
 	extern int spawn_ksoftirqd(void);
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_VCPU)
 	extern int migration_init(void);
 
 	migration_init();
@@ -744,6 +784,12 @@ static int init(void * unused)
 
 	fixup_cpu_present_map();
 	smp_init();
+
+	/*
+	 * This should be done after all cpus are known to
+	 * be online.  smp_init gives us confidence in it.
+	 */
+	fairsched_init_late();
 	sched_init_smp();
 
 	cpuset_init_smp();
diff -upr linux-2.6.16.46-0.12.orig/init/version.c linux-2.6.16.46-0.12-027test011/init/version.c
--- linux-2.6.16.46-0.12.orig/init/version.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/init/version.c	2007-08-28 17:35:31.000000000 +0400
@@ -29,6 +29,12 @@ struct new_utsname system_utsname = {
 
 EXPORT_SYMBOL(system_utsname);
 
+struct new_utsname virt_utsname = {
+	/* we need only this field */
+	.release        = UTS_RELEASE,
+};
+EXPORT_SYMBOL(virt_utsname);
+
 const char linux_banner[] =
 	"Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@"
 	LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n";
diff -upr linux-2.6.16.46-0.12.orig/ipc/msg.c linux-2.6.16.46-0.12-027test011/ipc/msg.c
--- linux-2.6.16.46-0.12.orig/ipc/msg.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/ipc/msg.c	2007-08-28 17:35:33.000000000 +0400
@@ -91,6 +91,45 @@ void __init msg_init (void)
 				sysvipc_msg_proc_show);
 }
 
+#ifdef CONFIG_VE
+void __init prepare_msg(void)
+{
+	get_ve0()->_msg_ids = &msg_ids;
+	get_ve0()->_msg_ctlmax = msg_ctlmax;
+	get_ve0()->_msg_ctlmnb = msg_ctlmnb;
+	get_ve0()->_msg_ctlmni = msg_ctlmni;
+}
+
+#define msg_ids		(*(get_exec_env()->_msg_ids))
+#define msg_ctlmax	(get_exec_env()->_msg_ctlmax)
+#define msg_ctlmnb	(get_exec_env()->_msg_ctlmnb)
+#define msg_ctlmni	(get_exec_env()->_msg_ctlmni)
+
+void init_ve_ipc_msg(void)
+{
+	msg_ctlmax = MSGMAX;
+	msg_ctlmnb = MSGMNB;
+	msg_ctlmni = MSGMNI;
+	ipc_init_ids(&msg_ids, MSGMNI);
+}
+
+void cleanup_ve_ipc_msg(void)
+{
+	int i;
+	struct msg_queue *msq;
+
+	down(&msg_ids.sem);
+	for (i = 0; i <= msg_ids.max_id; i++) {
+		msq = msg_lock(i);
+		if (msq == NULL)
+			continue;
+
+		freeque(msq, i);
+	}
+	up(&msg_ids.sem);
+}
+#endif
+
 static int newque (key_t key, int msgflg)
 {
 	int id;
@@ -111,7 +150,7 @@ static int newque (key_t key, int msgflg
 		return retval;
 	}
 
-	id = ipc_addid(&msg_ids, &msq->q_perm, msg_ctlmni);
+	id = ipc_addid(&msg_ids, &msq->q_perm, msg_ctlmni, -1);
 	if(id == -1) {
 		security_msg_queue_free(msq);
 		ipc_rcu_putref(msq);
@@ -461,7 +500,7 @@ asmlinkage long sys_msgctl (int msqid, i
 
 	err = -EPERM;
 	if (current->euid != ipcp->cuid && 
-	    current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN))
+	    current->euid != ipcp->uid && !capable(CAP_VE_SYS_ADMIN))
 	    /* We _could_ check for CAP_CHOWN above, but we don't */
 		goto out_unlock_up;
 
@@ -551,7 +590,7 @@ static inline int pipelined_send(struct 
 				msr->r_msg = ERR_PTR(-E2BIG);
 			} else {
 				msr->r_msg = NULL;
-				msq->q_lrpid = msr->r_tsk->pid;
+				msq->q_lrpid = virt_pid(msr->r_tsk);
 				msq->q_rtime = get_seconds();
 				wake_up_process(msr->r_tsk);
 				smp_mb();
@@ -633,7 +672,7 @@ asmlinkage long sys_msgsnd (int msqid, s
 		}
 	}
 
-	msq->q_lspid = current->tgid;
+	msq->q_lspid = virt_tgid(current);
 	msq->q_stime = get_seconds();
 
 	if(!pipelined_send(msq,msg)) {
@@ -729,7 +768,7 @@ asmlinkage long sys_msgrcv (int msqid, s
 			list_del(&msg->m_list);
 			msq->q_qnum--;
 			msq->q_rtime = get_seconds();
-			msq->q_lrpid = current->tgid;
+			msq->q_lrpid = virt_tgid(current);
 			msq->q_cbytes -= msg->m_ts;
 			atomic_sub(msg->m_ts,&msg_bytes);
 			atomic_dec(&msg_hdrs);
@@ -844,3 +883,27 @@ static int sysvipc_msg_proc_show(struct 
 			  msq->q_ctime);
 }
 #endif
+
+#ifdef CONFIG_VE
+#include <linux/module.h>
+
+int sysvipc_walk_msg(int (*func)(int i, struct msg_queue*, void *), void *arg)
+{
+	int i;
+	int err = 0;
+	struct msg_queue * msq;
+
+	down(&msg_ids.sem);
+	for(i = 0; i <= msg_ids.max_id; i++) {
+		if ((msq = msg_lock(i)) == NULL)
+			continue;
+		err = func(msg_buildid(i,msq->q_perm.seq), msq, arg);
+		msg_unlock(msq);
+		if (err)
+			break;
+	}
+	up(&msg_ids.sem);
+	return err;
+}
+EXPORT_SYMBOL_GPL(sysvipc_walk_msg);
+#endif
diff -upr linux-2.6.16.46-0.12.orig/ipc/msgutil.c linux-2.6.16.46-0.12-027test011/ipc/msgutil.c
--- linux-2.6.16.46-0.12.orig/ipc/msgutil.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/ipc/msgutil.c	2007-08-28 17:35:30.000000000 +0400
@@ -17,6 +17,8 @@
 
 #include "util.h"
 
+#include <ub/ub_mem.h>
+
 struct msg_msgseg {
 	struct msg_msgseg* next;
 	/* the next part of the message follows immediately */
@@ -36,7 +38,7 @@ struct msg_msg *load_msg(const void __us
 	if (alen > DATALEN_MSG)
 		alen = DATALEN_MSG;
 
-	msg = (struct msg_msg *)kmalloc(sizeof(*msg) + alen, GFP_KERNEL);
+	msg = (struct msg_msg *)ub_kmalloc(sizeof(*msg) + alen, GFP_KERNEL);
 	if (msg == NULL)
 		return ERR_PTR(-ENOMEM);
 
@@ -56,7 +58,7 @@ struct msg_msg *load_msg(const void __us
 		alen = len;
 		if (alen > DATALEN_SEG)
 			alen = DATALEN_SEG;
-		seg = (struct msg_msgseg *)kmalloc(sizeof(*seg) + alen,
+		seg = (struct msg_msgseg *)ub_kmalloc(sizeof(*seg) + alen,
 						 GFP_KERNEL);
 		if (seg == NULL) {
 			err = -ENOMEM;
diff -upr linux-2.6.16.46-0.12.orig/ipc/sem.c linux-2.6.16.46-0.12-027test011/ipc/sem.c
--- linux-2.6.16.46-0.12.orig/ipc/sem.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/ipc/sem.c	2007-08-28 17:35:33.000000000 +0400
@@ -81,6 +81,7 @@
 #include <asm/uaccess.h>
 #include "util.h"
 
+#include <ub/ub_mem.h>
 
 #define sem_lock(id)	((struct sem_array*)ipc_lock(&sem_ids,id))
 #define sem_unlock(sma)	ipc_unlock(&(sma)->sem_perm)
@@ -91,7 +92,7 @@
 	ipc_buildid(&sem_ids, id, seq)
 static struct ipc_ids sem_ids;
 
-static int newary (key_t, int, int);
+static int newary (key_t, int, int, int);
 static void freeary (struct sem_array *sma, int id);
 #ifdef CONFIG_PROC_FS
 static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
@@ -127,6 +128,48 @@ void __init sem_init (void)
 				sysvipc_sem_proc_show);
 }
 
+#ifdef CONFIG_VE
+void __init prepare_sem(void)
+{
+	get_ve0()->_sem_ids = &sem_ids;
+	get_ve0()->_used_sems = used_sems;
+	get_ve0()->_sem_ctls[0] = sem_ctls[0];
+	get_ve0()->_sem_ctls[1] = sem_ctls[1];
+	get_ve0()->_sem_ctls[2] = sem_ctls[2];
+	get_ve0()->_sem_ctls[3] = sem_ctls[3];
+}
+
+#define sem_ids		(*(get_exec_env()->_sem_ids))
+#define used_sems	(get_exec_env()->_used_sems)
+#define sem_ctls	(get_exec_env()->_sem_ctls)
+
+void init_ve_ipc_sem(void)
+{
+	used_sems = 0;
+	sem_ctls[0] = SEMMSL;
+	sem_ctls[1] = SEMMNS;
+	sem_ctls[2] = SEMOPM;
+	sem_ctls[3] = SEMMNI;
+	ipc_init_ids(&sem_ids, SEMMNI);
+}
+
+void cleanup_ve_ipc_sem(void)
+{
+	int i;
+	struct sem_array *sma;
+
+	down(&sem_ids.sem);
+	for (i = 0; i <= sem_ids.max_id; i++) {
+		sma = sem_lock(i);
+		if (sma == NULL)
+			continue;
+
+		freeary(sma, i);
+	}
+	up(&sem_ids.sem);
+}
+#endif
+
 /*
  * Lockless wakeup algorithm:
  * Without the check/retry algorithm a lockless wakeup is possible:
@@ -161,7 +204,7 @@ void __init sem_init (void)
  */
 #define IN_WAKEUP	1
 
-static int newary (key_t key, int nsems, int semflg)
+static int newary (key_t key, int semid, int nsems, int semflg)
 {
 	int id;
 	int retval;
@@ -190,7 +233,7 @@ static int newary (key_t key, int nsems,
 		return retval;
 	}
 
-	id = ipc_addid(&sem_ids, &sma->sem_perm, sc_semmni);
+	id = ipc_addid(&sem_ids, &sma->sem_perm, sc_semmni, semid);
 	if(id == -1) {
 		security_sem_free(sma);
 		ipc_rcu_putref(sma);
@@ -220,12 +263,12 @@ asmlinkage long sys_semget (key_t key, i
 	down(&sem_ids.sem);
 	
 	if (key == IPC_PRIVATE) {
-		err = newary(key, nsems, semflg);
+		err = newary(key, -1, nsems, semflg);
 	} else if ((id = ipc_findkey(&sem_ids, key)) == -1) {  /* key not used */
 		if (!(semflg & IPC_CREAT))
 			err = -ENOENT;
 		else
-			err = newary(key, nsems, semflg);
+			err = newary(key, -1, nsems, semflg);
 	} else if (semflg & IPC_CREAT && semflg & IPC_EXCL) {
 		err = -EEXIST;
 	} else {
@@ -746,7 +789,7 @@ static int semctl_main(int semid, int se
 		for (un = sma->undo; un; un = un->id_next)
 			un->semadj[semnum] = 0;
 		curr->semval = val;
-		curr->sempid = current->tgid;
+		curr->sempid = virt_tgid(current);
 		sma->sem_ctime = get_seconds();
 		/* maybe some queued-up processes were waiting for this */
 		update_queue(sma);
@@ -833,7 +876,7 @@ static int semctl_down(int semid, int se
 			goto out_unlock;
 	}
 	if (current->euid != ipcp->cuid && 
-	    current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) {
+	    current->euid != ipcp->uid && !capable(CAP_VE_SYS_ADMIN)) {
 	    	err=-EPERM;
 		goto out_unlock;
 	}
@@ -954,7 +997,8 @@ static inline int get_undo_list(struct s
 	undo_list = current->sysvsem.undo_list;
 	if (!undo_list) {
 		size = sizeof(struct sem_undo_list);
-		undo_list = (struct sem_undo_list *) kmalloc(size, GFP_KERNEL);
+		undo_list = (struct sem_undo_list *) ub_kmalloc(size,
+				GFP_KERNEL);
 		if (undo_list == NULL)
 			return -ENOMEM;
 		memset(undo_list, 0, size);
@@ -1018,7 +1062,8 @@ static struct sem_undo *find_undo(int se
 	ipc_rcu_getref(sma);
 	sem_unlock(sma);
 
-	new = (struct sem_undo *) kmalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
+	new = (struct sem_undo *) ub_kmalloc(sizeof(struct sem_undo) +
+			sizeof(short)*nsems, GFP_KERNEL);
 	if (!new) {
 		ipc_lock_by_ptr(&sma->sem_perm);
 		ipc_rcu_putref(sma);
@@ -1076,7 +1121,7 @@ asmlinkage long sys_semtimedop(int semid
 	if (nsops > sc_semopm)
 		return -E2BIG;
 	if(nsops > SEMOPM_FAST) {
-		sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL);
+		sops = ub_kmalloc(sizeof(*sops)*nsops,GFP_KERNEL);
 		if(sops==NULL)
 			return -ENOMEM;
 	}
@@ -1145,7 +1190,7 @@ retry_undos:
 	if (error)
 		goto out_unlock_free;
 
-	error = try_atomic_semop (sma, sops, nsops, un, current->tgid);
+	error = try_atomic_semop (sma, sops, nsops, un, virt_tgid(current));
 	if (error <= 0) {
 		if (alter && error == 0)
 			update_queue (sma);
@@ -1160,7 +1205,7 @@ retry_undos:
 	queue.sops = sops;
 	queue.nsops = nsops;
 	queue.undo = un;
-	queue.pid = current->tgid;
+	queue.pid = virt_tgid(current);
 	queue.id = semid;
 	queue.alter = alter;
 	if (alter)
@@ -1330,7 +1375,7 @@ found:
 					sem->semval = 0;
 				if (sem->semval > SEMVMX)
 					sem->semval = SEMVMX;
-				sem->sempid = current->tgid;
+				sem->sempid = virt_tgid(current);
 			}
 		}
 		sma->sem_otime = get_seconds();
@@ -1361,3 +1406,48 @@ static int sysvipc_sem_proc_show(struct 
 			  sma->sem_ctime);
 }
 #endif
+
+#ifdef CONFIG_VE
+#include <linux/module.h>
+
+int sysvipc_setup_sem(key_t key, int semid, size_t size, int semflg)
+{
+	int err = 0;
+	struct sem_array *sma;
+
+	down(&sem_ids.sem);
+	sma = sem_lock(semid);
+	if (!sma) {
+		err = newary(key, semid, size, semflg);
+		if (err >= 0)
+			sma = sem_lock(semid);
+	}
+	if (sma)
+		sem_unlock(sma);
+	up(&sem_ids.sem);
+
+	return err > 0 ? 0 : err;
+}
+EXPORT_SYMBOL_GPL(sysvipc_setup_sem);
+
+int sysvipc_walk_sem(int (*func)(int i, struct sem_array*, void *), void *arg)
+{
+	int i;
+	int err = 0;
+	struct sem_array *sma;
+
+	down(&sem_ids.sem);
+	for (i = 0; i <= sem_ids.max_id; i++) {
+		if ((sma = sem_lock(i)) == NULL)
+			continue;
+		err = func(sem_buildid(i,sma->sem_perm.seq), sma, arg);
+		sem_unlock(sma);
+		if (err)
+			break;
+	}
+	up(&sem_ids.sem);
+	return err;
+}
+EXPORT_SYMBOL_GPL(sysvipc_walk_sem);
+EXPORT_SYMBOL_GPL(exit_sem);
+#endif
diff -upr linux-2.6.16.46-0.12.orig/ipc/shm.c linux-2.6.16.46-0.12-027test011/ipc/shm.c
--- linux-2.6.16.46-0.12.orig/ipc/shm.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/ipc/shm.c	2007-08-28 17:35:36.000000000 +0400
@@ -32,9 +32,13 @@
 #include <linux/capability.h>
 #include <linux/ptrace.h>
 #include <linux/seq_file.h>
+#include <linux/shmem_fs.h>
 
 #include <asm/uaccess.h>
 
+#include <ub/beancounter.h>
+#include <ub/ub_vmpages.h>
+
 #include "util.h"
 
 static struct file_operations shm_file_operations;
@@ -48,9 +52,11 @@ static struct ipc_ids shm_ids;
 #define shm_buildid(id, seq) \
 	ipc_buildid(&shm_ids, id, seq)
 
-static int newseg (key_t key, int shmflg, size_t size);
+static int newseg (key_t key, int shmid, int shmflg, size_t size);
 static void shm_open (struct vm_area_struct *shmd);
 static void shm_close (struct vm_area_struct *shmd);
+static void shm_destroy (struct shmid_kernel *shmd);
+static void do_shm_rmid(struct shmid_kernel *shp);
 #ifdef CONFIG_PROC_FS
 static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
 #endif
@@ -70,6 +76,68 @@ void __init shm_init (void)
 				sysvipc_shm_proc_show);
 }
 
+#ifdef CONFIG_VE
+void __init prepare_shm(void)
+{
+	get_ve0()->_shm_ids = &shm_ids;
+	get_ve0()->_shm_ctlmax = shm_ctlmax;
+	get_ve0()->_shm_ctlall = shm_ctlall;
+	get_ve0()->_shm_ctlmni = shm_ctlmni;
+	get_ve0()->_shm_tot = shm_tot;
+}
+
+#define shm_ids		(*(get_exec_env()->_shm_ids))
+#define shm_ctlmax	(get_exec_env()->_shm_ctlmax)
+#define shm_ctlall	(get_exec_env()->_shm_ctlall)
+#define shm_ctlmni	(get_exec_env()->_shm_ctlmni)
+#define shm_total	(get_exec_env()->_shm_tot)
+
+void init_ve_ipc_shm(void)
+{
+	shm_ctlmax = SHMMAX;
+	shm_ctlall = SHMALL;
+	shm_ctlmni = SHMMNI;
+	shm_total = 0;
+	ipc_init_ids(&shm_ids, 1);
+}
+
+void cleanup_ve_ipc_shm(void)
+{
+	int i;
+	struct shmid_kernel *shp;
+
+	down(&shm_ids.sem);
+	for (i = 0; i <= shm_ids.max_id; i++) {
+		shp = shm_lock(i);
+		if (shp == NULL)
+			continue;
+
+		do_shm_rmid(shp);
+	}
+	up(&shm_ids.sem);
+}
+#define sb_ve(sb)		(sb->s_type->owner_env)
+#define shm_total_sb(sb)	(&sb_ve(sb)->_shm_tot)
+#define shm_lock_sb(id, sb)	((struct shmid_kernel *) \
+		ipc_lock(sb_ve(sb)->_shm_ids, id))
+#else
+/* renamed since there is a struct field named shm_tot */
+#define shm_total		shm_tot
+#define shm_total_sb(sb)	(&shm_tot)
+#define shm_lock_sb(id, sb)	shm_lock(id)
+#endif
+
+static void do_shm_rmid(struct shmid_kernel *shp)
+{
+	if (shp->shm_nattch){
+		shp->shm_perm.mode |= SHM_DEST;
+		/* Do not find it any more */
+		shp->shm_perm.key = IPC_PRIVATE;
+		shm_unlock(shp);
+	} else
+		shm_destroy (shp);
+}
+
 static inline int shm_checkid(struct shmid_kernel *s, int id)
 {
 	if (ipc_checkid(&shm_ids,&s->shm_perm,id))
@@ -77,25 +145,25 @@ static inline int shm_checkid(struct shm
 	return 0;
 }
 
-static inline struct shmid_kernel *shm_rmid(int id)
+static inline struct shmid_kernel *shm_rmid(struct ipc_ids *ids, int id)
 {
-	return (struct shmid_kernel *)ipc_rmid(&shm_ids,id);
+	return (struct shmid_kernel *)ipc_rmid(ids,id);
 }
 
-static inline int shm_addid(struct shmid_kernel *shp)
+static inline int shm_addid(struct shmid_kernel *shp, int reqid)
 {
-	return ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni);
+	return ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni, reqid);
 }
 
 
 
-static inline void shm_inc (int id) {
+static inline void shm_inc(int id, struct super_block *sb) {
 	struct shmid_kernel *shp;
 
-	if(!(shp = shm_lock(id)))
+	if(!(shp = shm_lock_sb(id, sb)))
 		BUG();
 	shp->shm_atim = get_seconds();
-	shp->shm_lprid = current->tgid;
+	shp->shm_lprid = virt_tgid(current);
 	shp->shm_nattch++;
 	shm_unlock(shp);
 }
@@ -103,7 +171,50 @@ static inline void shm_inc (int id) {
 /* This is called by fork, once for every shm attach. */
 static void shm_open (struct vm_area_struct *shmd)
 {
-	shm_inc (shmd->vm_file->f_dentry->d_inode->i_ino);
+	shm_inc(shmd->vm_file->f_dentry->d_inode->i_ino,
+			shmd->vm_file->f_dentry->d_inode->i_sb);
+}
+
+static int shmem_lock(struct shmid_kernel *shp, int lock,
+		struct user_struct *user)
+{
+	struct file *file = shp->shm_file;
+	struct inode *inode = file->f_dentry->d_inode;
+	struct shmem_inode_info *info = SHMEM_I(inode);
+	unsigned long size;
+
+	size = shp->shm_segsz + PAGE_SIZE - 1;
+
+#ifdef CONFIG_SHMEM
+	spin_lock(&info->lock);
+	if (lock && !(info->flags & VM_LOCKED)) {
+		if (ub_lockedshm_charge(info, size) < 0)
+			goto out_ch;
+
+		if (!user_shm_lock(inode->i_size, user))
+			goto out_user;
+		info->flags |= VM_LOCKED;
+	}
+	if (!lock && (info->flags & VM_LOCKED) && user) {
+		ub_lockedshm_uncharge(info, size);
+		user_shm_unlock(inode->i_size, user);
+		info->flags &= ~VM_LOCKED;
+	}
+	spin_unlock(&info->lock);
+	return 0;
+
+out_user:
+	ub_lockedshm_uncharge(info, size);
+out_ch:
+	spin_unlock(&info->lock);
+	return -ENOMEM;
+#else
+	if (lock && ub_lockedshm_charge(info, size))
+		return -ENOMEM;
+	if (!lock)
+		ub_lockedshm_uncharge(info, size);
+	return 0;
+#endif
 }
 
 /*
@@ -116,15 +227,24 @@ static void shm_open (struct vm_area_str
  */
 static void shm_destroy (struct shmid_kernel *shp)
 {
-	shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	shm_rmid (shp->id);
+	int numpages, *shm_totalp;
+	struct file *f;
+	struct super_block *sb;
+
+	f = shp->shm_file;
+	sb = f->f_dentry->d_inode->i_sb;
+	numpages = (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	shm_totalp = shm_total_sb(sb);
+	*shm_totalp -= numpages;
+
+	shm_rmid (shp->_shm_ids, shp->id);
 	shm_unlock(shp);
 	if (!is_file_hugepages(shp->shm_file))
-		shmem_lock(shp->shm_file, 0, shp->mlock_user);
+		shmem_lock(shp, 0, shp->mlock_user);
 	else
 		user_shm_unlock(shp->shm_file->f_dentry->d_inode->i_size,
 						shp->mlock_user);
-	fput (shp->shm_file);
+	fput(f);
 	security_shm_free(shp);
 	ipc_rcu_putref(shp);
 }
@@ -140,12 +260,24 @@ static void shm_close (struct vm_area_st
 	struct file * file = shmd->vm_file;
 	int id = file->f_dentry->d_inode->i_ino;
 	struct shmid_kernel *shp;
+	struct super_block *sb;
+	struct ipc_ids *ids;
+#ifdef CONFIG_VE
+	struct ve_struct *ve;
+
+	sb = file->f_dentry->d_inode->i_sb;
+	ve = get_ve(sb_ve(sb));
+	ids = ve->_shm_ids;
+#else
+	sb = file->f_dentry->d_inode->i_sb;
+	ids = &shm_ids;
+#endif
 
-	down (&shm_ids.sem);
+	down (&ids->sem);
 	/* remove from the list of attaches of the shm segment */
-	if(!(shp = shm_lock(id)))
+	if(!(shp = shm_lock_sb(id, sb)))
 		BUG();
-	shp->shm_lprid = current->tgid;
+	shp->shm_lprid = virt_tgid(current);
 	shp->shm_dtim = get_seconds();
 	shp->shm_nattch--;
 	if(shp->shm_nattch == 0 &&
@@ -153,7 +285,10 @@ static void shm_close (struct vm_area_st
 		shm_destroy (shp);
 	else
 		shm_unlock(shp);
-	up (&shm_ids.sem);
+	up(&ids->sem);
+#ifdef CONFIG_VE
+	put_ve(ve);
+#endif
 }
 
 static int shm_mmap(struct file * file, struct vm_area_struct * vma)
@@ -165,7 +300,8 @@ static int shm_mmap(struct file * file, 
 		vma->vm_ops = &shm_vm_ops;
 		if (!(vma->vm_flags & VM_WRITE))
 			vma->vm_flags &= ~VM_MAYWRITE;
-		shm_inc(file->f_dentry->d_inode->i_ino);
+		shm_inc(file->f_dentry->d_inode->i_ino,
+				file->f_dentry->d_inode->i_sb);
 	}
 
 	return ret;
@@ -188,19 +324,19 @@ static struct vm_operations_struct shm_v
 #endif
 };
 
-static int newseg (key_t key, int shmflg, size_t size)
+static int newseg (key_t key, int shmid, int shmflg, size_t size)
 {
 	int error;
 	struct shmid_kernel *shp;
 	int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
 	struct file * file;
-	char name[13];
+	char name[26];
 	int id;
 
 	if (size < SHMMIN || size > shm_ctlmax)
 		return -EINVAL;
 
-	if (shm_tot + numpages >= shm_ctlall)
+	if (shm_total + numpages >= shm_ctlall)
 		return -ENOSPC;
 
 	shp = ipc_rcu_alloc(sizeof(*shp));
@@ -231,7 +367,11 @@ static int newseg (key_t key, int shmflg
 		if  ((shmflg & SHM_NORESERVE) &&
 				sysctl_overcommit_memory != OVERCOMMIT_NEVER)
 			acctflag = 0;
+#ifdef CONFIG_VE
+		sprintf (name, "VE%d.SYSV%08x", get_exec_env()->veid, key);
+#else
 		sprintf (name, "SYSV%08x", key);
+#endif
 		file = shmem_file_setup(name, size, acctflag);
 	}
 	error = PTR_ERR(file);
@@ -239,17 +379,18 @@ static int newseg (key_t key, int shmflg
 		goto no_file;
 
 	error = -ENOSPC;
-	id = shm_addid(shp);
+	id = shm_addid(shp, shmid);
 	if(id == -1) 
 		goto no_id;
 
-	shp->shm_cprid = current->tgid;
+	shp->shm_cprid = virt_tgid(current);
 	shp->shm_lprid = 0;
 	shp->shm_atim = shp->shm_dtim = 0;
 	shp->shm_ctim = get_seconds();
 	shp->shm_segsz = size;
 	shp->shm_nattch = 0;
 	shp->id = shm_buildid(id,shp->shm_perm.seq);
+	shp->_shm_ids = &shm_ids;
 	shp->shm_file = file;
 	file->f_dentry->d_inode->i_ino = shp->id;
 
@@ -257,7 +398,7 @@ static int newseg (key_t key, int shmflg
 	if (!(shmflg & SHM_HUGETLB))
 		file->f_op = &shm_file_operations;
 
-	shm_tot += numpages;
+	shm_total += numpages;
 	shm_unlock(shp);
 	return shp->id;
 
@@ -276,12 +417,12 @@ asmlinkage long sys_shmget (key_t key, s
 
 	down(&shm_ids.sem);
 	if (key == IPC_PRIVATE) {
-		err = newseg(key, shmflg, size);
+		err = newseg(key, -1, shmflg, size);
 	} else if ((id = ipc_findkey(&shm_ids, key)) == -1) {
 		if (!(shmflg & IPC_CREAT))
 			err = -ENOENT;
 		else
-			err = newseg(key, shmflg, size);
+			err = newseg(key, -1, shmflg, size);
 	} else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) {
 		err = -EEXIST;
 	} else {
@@ -474,7 +615,7 @@ asmlinkage long sys_shmctl (int shmid, i
 		down(&shm_ids.sem);
 		shm_info.used_ids = shm_ids.in_use;
 		shm_get_stat (&shm_info.shm_rss, &shm_info.shm_swp);
-		shm_info.shm_tot = shm_tot;
+		shm_info.shm_tot = shm_total;
 		shm_info.swap_attempts = 0;
 		shm_info.swap_successes = 0;
 		err = shm_ids.max_id;
@@ -565,14 +706,15 @@ asmlinkage long sys_shmctl (int shmid, i
 		if(cmd==SHM_LOCK) {
 			struct user_struct * user = current->user;
 			if (!is_file_hugepages(shp->shm_file)) {
-				err = shmem_lock(shp->shm_file, 1, user);
-				if (!err) {
+				err = shmem_lock(shp, 1, user);
+				if (!err &&
+				    !(shp->shm_perm.mode & SHM_LOCKED)) {
 					shp->shm_perm.mode |= SHM_LOCKED;
 					shp->mlock_user = user;
 				}
 			}
 		} else if (!is_file_hugepages(shp->shm_file)) {
-			shmem_lock(shp->shm_file, 0, shp->mlock_user);
+			shmem_lock(shp, 0, shp->mlock_user);
 			shp->shm_perm.mode &= ~SHM_LOCKED;
 			shp->mlock_user = NULL;
 		}
@@ -606,7 +748,7 @@ asmlinkage long sys_shmctl (int shmid, i
 
 		if (current->euid != shp->shm_perm.uid &&
 		    current->euid != shp->shm_perm.cuid && 
-		    !capable(CAP_SYS_ADMIN)) {
+		    !capable(CAP_VE_SYS_ADMIN)) {
 			err=-EPERM;
 			goto out_unlock_up;
 		}
@@ -615,13 +757,7 @@ asmlinkage long sys_shmctl (int shmid, i
 		if (err)
 			goto out_unlock_up;
 
-		if (shp->shm_nattch){
-			shp->shm_perm.mode |= SHM_DEST;
-			/* Do not find it any more */
-			shp->shm_perm.key = IPC_PRIVATE;
-			shm_unlock(shp);
-		} else
-			shm_destroy (shp);
+		do_shm_rmid(shp);
 		up(&shm_ids.sem);
 		goto out;
 	}
@@ -649,7 +785,7 @@ asmlinkage long sys_shmctl (int shmid, i
 		err=-EPERM;
 		if (current->euid != shp->shm_perm.uid &&
 		    current->euid != shp->shm_perm.cuid && 
-		    !capable(CAP_SYS_ADMIN)) {
+		    !capable(CAP_VE_SYS_ADMIN)) {
 			goto out_unlock_up;
 		}
 
@@ -932,3 +1068,55 @@ static int sysvipc_shm_proc_show(struct 
 			  shp->shm_ctim);
 }
 #endif
+
+#ifdef CONFIG_VE
+#include <linux/module.h>
+
+struct file * sysvipc_setup_shm(key_t key, int shmid, size_t size, int shmflg)
+{
+	struct shmid_kernel *shp;
+	struct file *file;
+
+	down(&shm_ids.sem);
+	shp = shm_lock(shmid);
+	if (!shp) {
+		int err;
+
+		err = newseg(key, shmid, shmflg, size);
+		file = ERR_PTR(err);
+		if (err < 0)
+			goto out;
+		shp = shm_lock(shmid);
+	}
+	file = ERR_PTR(-EINVAL);
+	if (shp) {
+		file = shp->shm_file;
+		get_file(file);
+		shm_unlock(shp);
+	}
+out:
+	up(&shm_ids.sem);
+	return file;
+}
+EXPORT_SYMBOL_GPL(sysvipc_setup_shm);
+
+int sysvipc_walk_shm(int (*func)(struct shmid_kernel*, void *), void *arg)
+{
+	int i;
+	int err = 0;
+	struct shmid_kernel* shp;
+
+	down(&shm_ids.sem);
+	for(i = 0; i <= shm_ids.max_id; i++) {
+		if ((shp = shm_lock(i)) == NULL)
+			continue;
+		err = func(shp, arg);
+		shm_unlock(shp);
+		if (err)
+			break;
+	}
+	up(&shm_ids.sem);
+	return err;
+}
+EXPORT_SYMBOL_GPL(sysvipc_walk_shm);
+#endif
diff -upr linux-2.6.16.46-0.12.orig/ipc/util.c linux-2.6.16.46-0.12-027test011/ipc/util.c
--- linux-2.6.16.46-0.12.orig/ipc/util.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/ipc/util.c	2007-08-28 17:35:33.000000000 +0400
@@ -15,6 +15,7 @@
  */
 
 #include <linux/config.h>
+#include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/shm.h>
 #include <linux/init.h>
@@ -33,6 +34,8 @@
 
 #include <asm/unistd.h>
 
+#include <ub/ub_mem.h>
+
 #include "util.h"
 
 struct ipc_proc_iface {
@@ -68,7 +71,7 @@ __initcall(ipc_init);
  *	array itself. 
  */
  
-void __init ipc_init_ids(struct ipc_ids* ids, int size)
+void __ve_init ipc_init_ids(struct ipc_ids* ids, int size)
 {
 	int i;
 	sema_init(&ids->sem,1);
@@ -97,7 +100,21 @@ void __init ipc_init_ids(struct ipc_ids*
 	ids->entries->size = size;
 	for(i=0;i<size;i++)
 		ids->entries->p[i] = NULL;
+
+	ids->owner_env = get_exec_env();
+}
+
+#ifdef CONFIG_VE
+static inline void ipc_free_ids(struct ipc_ids *ids)
+{
+	if (ids == NULL)
+		return;
+
+	if (ids->entries != &ids->nullentry)
+		ipc_rcu_putref(ids->entries);
+	kfree(ids);
 }
+#endif
 
 #ifdef CONFIG_PROC_FS
 static struct file_operations sysvipc_proc_fops;
@@ -215,10 +232,20 @@ static int grow_ary(struct ipc_ids* ids,
  *	Called with ipc_ids.sem held.
  */
  
-int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
+int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size, int reqid)
 {
 	int id;
 
+	if (reqid >= 0) {
+		id = reqid%SEQ_MULTIPLIER;
+		size = grow_ary(ids,id+1);
+		if (id >= size)
+			return -1;
+		if (ids->entries->p[id] == NULL)
+			goto found;
+		return -1;
+	}
+
 	size = grow_ary(ids,size);
 
 	/*
@@ -231,16 +258,21 @@ int ipc_addid(struct ipc_ids* ids, struc
 	}
 	return -1;
 found:
-	ids->in_use++;
+	if (ids->in_use++ == 0)
+		(void)get_ve(ids->owner_env);
 	if (id > ids->max_id)
 		ids->max_id = id;
 
 	new->cuid = new->uid = current->euid;
 	new->gid = new->cgid = current->egid;
 
-	new->seq = ids->seq++;
-	if(ids->seq > ids->seq_max)
-		ids->seq = 0;
+	if (reqid >= 0) {
+		new->seq = reqid/SEQ_MULTIPLIER;
+	} else {
+		new->seq = ids->seq++;
+		if(ids->seq > ids->seq_max)
+			ids->seq = 0;
+	}
 
 	spin_lock_init(&new->lock);
 	new->deleted = 0;
@@ -278,7 +310,8 @@ struct kern_ipc_perm* ipc_rmid(struct ip
 	ids->entries->p[lid] = NULL;
 	if(p==NULL)
 		BUG();
-	ids->in_use--;
+	if (--ids->in_use == 0)
+		put_ve(ids->owner_env);
 
 	if (lid == ids->max_id) {
 		do {
@@ -304,9 +337,9 @@ void* ipc_alloc(int size)
 {
 	void* out;
 	if(size > PAGE_SIZE)
-		out = vmalloc(size);
+		out = ub_vmalloc(size);
 	else
-		out = kmalloc(size, GFP_KERNEL);
+		out = ub_kmalloc(size, GFP_KERNEL);
 	return out;
 }
 
@@ -389,14 +422,14 @@ void* ipc_rcu_alloc(int size)
 	 * workqueue if necessary (for vmalloc). 
 	 */
 	if (rcu_use_vmalloc(size)) {
-		out = vmalloc(HDRLEN_VMALLOC + size);
+		out = ub_vmalloc(HDRLEN_VMALLOC + size);
 		if (out) {
 			out += HDRLEN_VMALLOC;
 			container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1;
 			container_of(out, struct ipc_rcu_hdr, data)->refcount = 1;
 		}
 	} else {
-		out = kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL);
+		out = ub_kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL);
 		if (out) {
 			out += HDRLEN_KMALLOC;
 			container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0;
@@ -607,6 +640,71 @@ int ipc_checkid(struct ipc_ids* ids, str
 	return 0;
 }
 
+#ifdef CONFIG_VE
+void __init prepare_ipc(void)
+{
+	prepare_msg();
+	prepare_sem();
+	prepare_shm();
+}
+
+int init_ve_ipc(struct ve_struct * envid)
+{
+	envid->_msg_ids = kmalloc(sizeof(struct ipc_ids) + sizeof(void *),
+			GFP_KERNEL);
+	if (envid->_msg_ids == NULL)
+		goto out_nomem;
+	envid->_sem_ids = kmalloc(sizeof(struct ipc_ids) + sizeof(void *),
+			GFP_KERNEL);
+	if (envid->_sem_ids == NULL)
+		goto out_free_msg;
+	envid->_shm_ids = kmalloc(sizeof(struct ipc_ids) + sizeof(void *),
+			GFP_KERNEL);
+	if (envid->_shm_ids == NULL)
+		goto out_free_sem;
+
+	init_ve_ipc_msg();
+	init_ve_ipc_sem();
+	init_ve_ipc_shm();
+	return 0;
+
+out_free_sem:
+	kfree(envid->_sem_ids);
+out_free_msg:
+	kfree(envid->_msg_ids);
+out_nomem:
+	return -ENOMEM;
+}
+
+void ve_ipc_cleanup(void)
+{
+	cleanup_ve_ipc_msg();
+	cleanup_ve_ipc_sem();
+	cleanup_ve_ipc_shm();
+}
+
+void ve_ipc_free(struct ve_struct *env)
+{
+	ipc_free_ids(env->_msg_ids);
+	ipc_free_ids(env->_sem_ids);
+	ipc_free_ids(env->_shm_ids);
+	env->_msg_ids = NULL;
+	env->_sem_ids = NULL;
+	env->_shm_ids = NULL;
+}
+
+void fini_ve_ipc(struct ve_struct *ptr)
+{
+	ve_ipc_cleanup();
+	ve_ipc_free(ptr);
+}
+
+EXPORT_SYMBOL(init_ve_ipc);
+EXPORT_SYMBOL(ve_ipc_cleanup);
+EXPORT_SYMBOL(ve_ipc_free);
+EXPORT_SYMBOL(fini_ve_ipc);
+#endif /* CONFIG_VE */
+
 #ifdef __ARCH_WANT_IPC_PARSE_VERSION
 
 
diff -upr linux-2.6.16.46-0.12.orig/ipc/util.h linux-2.6.16.46-0.12-027test011/ipc/util.h
--- linux-2.6.16.46-0.12.orig/ipc/util.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/ipc/util.h	2007-08-28 17:35:33.000000000 +0400
@@ -15,6 +15,22 @@ void sem_init (void);
 void msg_init (void);
 void shm_init (void);
 
+#ifdef CONFIG_VE
+void prepare_msg(void);
+void prepare_sem(void);
+void prepare_shm(void);
+void init_ve_ipc_msg(void);
+void init_ve_ipc_sem(void);
+void init_ve_ipc_shm(void);
+void cleanup_ve_ipc_msg(void);
+void cleanup_ve_ipc_sem(void);
+void cleanup_ve_ipc_shm(void);
+
+#define __ve_init
+#else
+#define __ve_init	__init
+#endif
+
 struct ipc_id_ary {
 	int size;
 	struct kern_ipc_perm *p[0];
@@ -28,10 +44,11 @@ struct ipc_ids {
 	struct semaphore sem;	
 	struct ipc_id_ary nullentry;
 	struct ipc_id_ary* entries;
+	struct ve_struct *owner_env;
 };
 
 struct seq_file;
-void __init ipc_init_ids(struct ipc_ids* ids, int size);
+void __ve_init ipc_init_ids(struct ipc_ids *ids, int size);
 #ifdef CONFIG_PROC_FS
 void __init ipc_init_proc_interface(const char *path, const char *header,
 				    struct ipc_ids *ids,
@@ -42,7 +59,7 @@ void __init ipc_init_proc_interface(cons
 
 /* must be called with ids->sem acquired.*/
 int ipc_findkey(struct ipc_ids* ids, key_t key);
-int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size);
+int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size, int reqid);
 
 /* must be called with both locks acquired. */
 struct kern_ipc_perm* ipc_rmid(struct ipc_ids* ids, int id);
diff -upr linux-2.6.16.46-0.12.orig/kdb/kdb_bt.c linux-2.6.16.46-0.12-027test011/kdb/kdb_bt.c
--- linux-2.6.16.46-0.12.orig/kdb/kdb_bt.c	2007-08-24 19:28:10.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kdb/kdb_bt.c	2007-08-28 17:35:31.000000000 +0400
@@ -118,7 +118,7 @@ kdb_bt(int argc, const char **argv, cons
 			return KDB_ARGCOUNT;
 		if ((diag = kdbgetularg((char *)argv[1], &pid)))
 			return diag;
-		if ((p = find_task_by_pid(pid))) {
+		if ((p = find_task_by_pid_all(pid))) {
 			kdba_set_current_task(p);
 			return kdb_bt1(p, ~0UL, argcount, 0);
 		}
diff -upr linux-2.6.16.46-0.12.orig/kdb/kdbmain.c linux-2.6.16.46-0.12-027test011/kdb/kdbmain.c
--- linux-2.6.16.46-0.12.orig/kdb/kdbmain.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kdb/kdbmain.c	2007-08-28 17:35:31.000000000 +0400
@@ -3196,7 +3196,7 @@ kdb_pid(int argc, const char **argv, con
 			if (diag)
 				return KDB_BADINT;
 
-			p = find_task_by_pid((pid_t)val);
+			p = find_task_by_pid_all((pid_t)val);
 			if (!p) {
 				kdb_printf("No task with pid=%d\n", (pid_t)val);
 				return 0;
@@ -3370,7 +3370,7 @@ kdb_kill(int argc, const char **argv, co
 	}
 
 	/* Find the process. */
-	if (!(p = find_task_by_pid(pid))) {
+	if (!(p = find_task_by_pid_all(pid))) {
 		kdb_printf("The specified process isn't found.\n");
 		return 0;
 	}
diff -upr linux-2.6.16.46-0.12.orig/kdb/modules/kdbm_vm.c linux-2.6.16.46-0.12-027test011/kdb/modules/kdbm_vm.c
--- linux-2.6.16.46-0.12.orig/kdb/modules/kdbm_vm.c	2007-08-24 19:28:16.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kdb/modules/kdbm_vm.c	2007-08-28 17:35:31.000000000 +0400
@@ -270,7 +270,7 @@ kdbm_pte(int argc, const char **argv, co
 		}
 
 		found = 0;
-		for_each_process(tp) {
+		for_each_process_all(tp) {
 			if (tp->pid == pid) {
 				if (tp->mm != NULL) {
 					found = 1;
@@ -383,7 +383,7 @@ kdbm_rpte(int argc, const char **argv, c
 		}
 
 		found = 0;
-		for_each_process(tp) {
+		for_each_process_all(tp) {
 			if (tp->pid == pid) {
 				if (tp->mm != NULL) {
 					found = 1;
diff -upr linux-2.6.16.46-0.12.orig/kernel/Kconfig.fairsched linux-2.6.16.46-0.12-027test011/kernel/Kconfig.fairsched
--- linux-2.6.16.46-0.12.orig/kernel/Kconfig.fairsched	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/Kconfig.fairsched	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,22 @@
+config SCHED_VCPU
+	bool "VCPU scheduler support"
+	default n
+	help
+	  VCPU scheduler support adds additional layer of abstraction
+	  which allows to virtualize cpu notion and split physical cpus
+	  and virtual cpus. This support allows to use CPU fair scheduler,
+	  dynamically add/remove cpus to/from VPS and so on.
+
+config FAIRSCHED
+	bool "Fair CPU scheduler (EXPERIMENTAL)"
+	depends on SCHED_VCPU
+	default SCHED_VCPU
+	help
+	  Config option for Fair CPU scheduler (fairsched).
+	  This option allows to group processes to scheduling nodes
+	  which receive CPU proportional to their weight.
+	  This is very important feature for process groups isolation and
+	  QoS management.
+
+	  If unsure, say N.
+
diff -upr linux-2.6.16.46-0.12.orig/kernel/Kconfig.openvz linux-2.6.16.46-0.12-027test011/kernel/Kconfig.openvz
--- linux-2.6.16.46-0.12.orig/kernel/Kconfig.openvz	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/Kconfig.openvz	2007-08-28 17:35:36.000000000 +0400
@@ -0,0 +1,73 @@
+# Copyright (C) 2005  SWsoft
+# All rights reserved.
+# Licensing governed by "linux/COPYING.SWsoft" file.
+
+menu "OpenVZ"
+
+config VE
+	bool "Virtual Environment support"
+	default y
+	help
+	  This option adds support of virtual Linux running on the original box
+	  with fully supported virtual network driver, tty subsystem and
+	  configurable access for hardware and other resources.
+
+config VE_CALLS
+	tristate "VE calls interface"
+	depends on VE
+	default m
+	help
+	  This option controls how to build vzmon code containing VE calls.
+	  By default it's build in module vzmon.o
+
+config VZ_GENCALLS
+	bool
+	default y
+
+config VE_NETDEV
+	tristate "VE network device"
+	depends on VE_CALLS && NET
+	default m
+	help
+	  This option controls whether to build venet device. This is a
+	  common interface for networking in VE.
+
+config VE_ETHDEV
+	tristate "Virtual ethernet device"
+	depends on VE_CALLS && NET
+	default m
+	help
+	  This option controls whether to build virtual ethernet device.
+
+config VZ_DEV
+	tristate "VE device"
+	default m
+	help
+	  This option adds support of vzdev device, which is used by
+	  user-space applications to control Virtual Environments.
+
+config VE_IPTABLES
+	bool "VE netfiltering"
+	depends on VE && VE_NETDEV && INET && NETFILTER
+	default y
+	help
+	  This option controls whether to build VE netfiltering code.
+
+config VZ_WDOG
+	tristate "VE watchdog module"
+	depends on VE_CALLS
+	default m
+	help
+	  This option controls building of vzwdog module, which dumps
+	  a lot of useful system info on console periodically.
+
+config VZ_CHECKPOINT
+ 	tristate "Checkpointing & restoring Virtual Environments"
+	depends on VE_CALLS && INET
+ 	default m
+ 	help
+ 	  This option adds two modules, "cpt" and "rst", which allow
+ 	  to save a running Virtual Environment and restore it
+ 	  on another host (live migration) or on the same host (checkpointing).
+
+endmenu
diff -upr linux-2.6.16.46-0.12.orig/kernel/Makefile linux-2.6.16.46-0.12-027test011/kernel/Makefile
--- linux-2.6.16.46-0.12.orig/kernel/Makefile	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/Makefile	2007-08-28 17:35:34.000000000 +0400
@@ -2,7 +2,8 @@
 # Makefile for the linux kernel.
 #
 
-obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
+obj-y     = sched.o fairsched.o \
+	    fork.o exec_domain.o panic.o printk.o profile.o \
 	    exit.o itimer.o time.o softirq.o resource.o \
 	    sysctl.o capability.o ptrace.o timer.o user.o \
 	    signal.o sys.o kmod.o workqueue.o pid.o \
@@ -10,6 +11,9 @@ obj-y     = sched.o fork.o exec_domain.o
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
 	    hrtimer.o
 
+obj-$(CONFIG_USER_RESOURCE) += ub/
+obj-$(CONFIG_VE) += ve/
+obj-$(CONFIG_VZ_CHECKPOINT) += cpt/
 obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
 obj-$(CONFIG_FUTEX) += futex.o
 obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
diff -upr linux-2.6.16.46-0.12.orig/kernel/audit.c linux-2.6.16.46-0.12-027test011/kernel/audit.c
--- linux-2.6.16.46-0.12.orig/kernel/audit.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/audit.c	2007-08-28 17:35:32.000000000 +0400
@@ -489,6 +489,9 @@ static int audit_receive_msg(struct sk_b
 	char			*ctx;
 	u32			len;
 
+	if (!ve_is_super(skb->owner_env))
+		return -ECONNREFUSED;
+
 	err = audit_netlink_ok(NETLINK_CB(skb).eff_cap, msg_type);
 	if (err)
 		return err;
diff -upr linux-2.6.16.46-0.12.orig/kernel/auditfilter.c linux-2.6.16.46-0.12-027test011/kernel/auditfilter.c
--- linux-2.6.16.46-0.12.orig/kernel/auditfilter.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/auditfilter.c	2007-08-28 17:35:33.000000000 +0400
@@ -167,7 +167,7 @@ static struct audit_parent *audit_init_p
 	inotify_init_watch(&parent->wdata);
 	/* grab a ref so inotify watch hangs around until we take audit_filter_mutex */
 	get_inotify_watch(&parent->wdata);
-	wd = inotify_add_watch(audit_ih, &parent->wdata, ndp->dentry->d_inode,
+	wd = inotify_add_watch_dget(audit_ih, &parent->wdata, ndp->dentry, ndp->mnt,
 			       AUDIT_IN_WATCH);
 	if (wd < 0) {
 		audit_free_parent(&parent->wdata);
diff -upr linux-2.6.16.46-0.12.orig/kernel/capability.c linux-2.6.16.46-0.12-027test011/kernel/capability.c
--- linux-2.6.16.46-0.12.orig/kernel/capability.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/kernel/capability.c	2007-08-28 17:35:32.000000000 +0400
@@ -15,16 +15,18 @@
 #include <asm/uaccess.h>
 
 unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
-kernel_cap_t cap_bset = CAP_INIT_EFF_SET;
-
 EXPORT_SYMBOL(securebits);
+#ifndef CONFIG_VE
+kernel_cap_t cap_bset = CAP_INIT_EFF_SET;
 EXPORT_SYMBOL(cap_bset);
+#endif
 
 /*
  * This lock protects task->cap_* for all tasks including current.
  * Locking rule: acquire this prior to tasklist_lock.
  */
-static DEFINE_SPINLOCK(task_capability_lock);
+DEFINE_SPINLOCK(task_capability_lock);
+EXPORT_SYMBOL(task_capability_lock);
 
 /*
  * For sys_getproccap() and sys_setproccap(), any of the three
@@ -67,8 +69,8 @@ asmlinkage long sys_capget(cap_user_head
      spin_lock(&task_capability_lock);
      read_lock(&tasklist_lock); 
 
-     if (pid && pid != current->pid) {
-	     target = find_task_by_pid(pid);
+     if (pid && pid != virt_pid(current)) {
+	     target = find_task_by_pid_ve(pid);
 	     if (!target) {
 	          ret = -ESRCH;
 	          goto out;
@@ -100,9 +102,13 @@ static inline int cap_set_pg(int pgrp, k
 	int ret = -EPERM;
 	int found = 0;
 
-	do_each_task_pid(pgrp, PIDTYPE_PGID, g) {
+	pgrp = vpid_to_pid(pgrp);
+	if (pgrp < 0)
+		return ret;
+
+	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, g) {
 		target = g;
-		while_each_thread(g, target) {
+		while_each_thread_ve(g, target) {
 			if (!security_capset_check(target, effective,
 							inheritable,
 							permitted)) {
@@ -113,7 +119,7 @@ static inline int cap_set_pg(int pgrp, k
 			}
 			found = 1;
 		}
-	} while_each_task_pid(pgrp, PIDTYPE_PGID, g);
+	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, g);
 
 	if (!found)
 	     ret = 0;
@@ -132,7 +138,7 @@ static inline int cap_set_all(kernel_cap
      int ret = -EPERM;
      int found = 0;
 
-     do_each_thread(g, target) {
+     do_each_thread_ve(g, target) {
              if (target == current || target->pid == 1)
                      continue;
              found = 1;
@@ -141,7 +147,7 @@ static inline int cap_set_all(kernel_cap
 		     continue;
 	     ret = 0;
 	     security_capset_set(target, effective, inheritable, permitted);
-     } while_each_thread(g, target);
+     } while_each_thread_ve(g, target);
 
      if (!found)
 	     ret = 0;
@@ -188,7 +194,7 @@ asmlinkage long sys_capset(cap_user_head
      if (get_user(pid, &header->pid))
 	     return -EFAULT; 
 
-     if (pid && pid != current->pid && !capable(CAP_SETPCAP))
+     if (pid && pid != virt_pid(current) && !capable(CAP_SETPCAP))
              return -EPERM;
 
      if (copy_from_user(&effective, &data->effective, sizeof(effective)) ||
@@ -199,8 +205,8 @@ asmlinkage long sys_capset(cap_user_head
      spin_lock(&task_capability_lock);
      read_lock(&tasklist_lock);
 
-     if (pid > 0 && pid != current->pid) {
-          target = find_task_by_pid(pid);
+     if (pid > 0 && pid != virt_pid(current)) {
+          target = find_task_by_pid_ve(pid);
           if (!target) {
                ret = -ESRCH;
                goto out;
diff -upr linux-2.6.16.46-0.12.orig/kernel/compat.c linux-2.6.16.46-0.12-027test011/kernel/compat.c
--- linux-2.6.16.46-0.12.orig/kernel/compat.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/kernel/compat.c	2007-08-28 17:35:33.000000000 +0400
@@ -21,6 +21,8 @@
 #include <linux/syscalls.h>
 #include <linux/unistd.h>
 #include <linux/security.h>
+#include <linux/hrtimer.h>
+#include <linux/module.h>
 
 #include <asm/uaccess.h>
 
@@ -38,61 +40,73 @@ int put_compat_timespec(const struct tim
 			__put_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0;
 }
 
-static long compat_nanosleep_restart(struct restart_block *restart)
+long compat_nanosleep_restart(struct restart_block *restart)
 {
-	unsigned long expire = restart->arg0, now = jiffies;
 	struct compat_timespec __user *rmtp;
+	struct timespec tu;
+	void *rfn_save = restart->fn;
+	struct hrtimer timer;
+	ktime_t rem;
 
-	/* Did it expire while we handled signals? */
-	if (!time_after(expire, now))
-		return 0;
+	restart->fn = do_no_restart_syscall;
+
+	hrtimer_init(&timer, (clockid_t) restart->arg3, HRTIMER_ABS);
+
+	timer.expires.tv64 = ((u64)restart->arg1 << 32) | (u64) restart->arg0;
 
-	expire = schedule_timeout_interruptible(expire - now);
-	if (expire == 0)
+	set_current_state(TASK_INTERRUPTIBLE);
+	rem = schedule_hrtimer(&timer, HRTIMER_ABS);
+
+	if (rem.tv64 <= 0)
 		return 0;
 
-	rmtp = (struct compat_timespec __user *)restart->arg1;
-	if (rmtp) {
-		struct compat_timespec ct;
-		struct timespec t;
-
-		jiffies_to_timespec(expire, &t);
-		ct.tv_sec = t.tv_sec;
-		ct.tv_nsec = t.tv_nsec;
-		if (copy_to_user(rmtp, &ct, sizeof(ct)))
-			return -EFAULT;
-	}
-	/* The 'restart' block is already filled in */
+	rmtp = (struct compat_timespec __user *) restart->arg2;
+	tu = ktime_to_timespec(rem);
+	if (rmtp && put_compat_timespec(&tu, rmtp))
+		return -EFAULT;
+
+	restart->fn = rfn_save;
+
+	/* The other values in restart are already filled in */
 	return -ERESTART_RESTARTBLOCK;
 }
+EXPORT_SYMBOL_GPL(compat_nanosleep_restart);
 
 asmlinkage long compat_sys_nanosleep(struct compat_timespec __user *rqtp,
 		struct compat_timespec __user *rmtp)
 {
 	struct timespec t;
 	struct restart_block *restart;
-	unsigned long expire;
+	struct hrtimer timer;
+	ktime_t rem;
 
 	if (get_compat_timespec(&t, rqtp))
 		return -EFAULT;
 
-	if ((t.tv_nsec >= 1000000000L) || (t.tv_nsec < 0) || (t.tv_sec < 0))
+	if (!timespec_valid(&t))
 		return -EINVAL;
 
-	expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
-	expire = schedule_timeout_interruptible(expire);
-	if (expire == 0)
+	hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_REL);
+
+	timer.expires = timespec_to_ktime(t);
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	rem = schedule_hrtimer(&timer, HRTIMER_REL);
+	if (rem.tv64 <= 0)
 		return 0;
 
-	if (rmtp) {
-		jiffies_to_timespec(expire, &t);
-		if (put_compat_timespec(&t, rmtp))
-			return -EFAULT;
-	}
+	t = ktime_to_timespec(rem);
+
+	if (rmtp && put_compat_timespec(&t, rmtp))
+		return -EFAULT;
+
 	restart = &current_thread_info()->restart_block;
 	restart->fn = compat_nanosleep_restart;
-	restart->arg0 = jiffies + expire;
-	restart->arg1 = (unsigned long) rmtp;
+	restart->arg0 = timer.expires.tv64 & 0xFFFFFFFF;
+	restart->arg1 = timer.expires.tv64 >> 32;
+	restart->arg2 = (unsigned long) rmtp;
+	restart->arg3 = (unsigned long) timer.base->index;
+
 	return -ERESTART_RESTARTBLOCK;
 }
 
diff -upr linux-2.6.16.46-0.12.orig/kernel/configs.c linux-2.6.16.46-0.12-027test011/kernel/configs.c
--- linux-2.6.16.46-0.12.orig/kernel/configs.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/kernel/configs.c	2007-08-28 17:35:31.000000000 +0400
@@ -62,18 +62,9 @@ static ssize_t
 ikconfig_read_current(struct file *file, char __user *buf,
 		      size_t len, loff_t * offset)
 {
-	loff_t pos = *offset;
-	ssize_t count;
-
-	if (pos >= kernel_config_data_size)
-		return 0;
-
-	count = min(len, (size_t)(kernel_config_data_size - pos));
-	if (copy_to_user(buf, kernel_config_data + MAGIC_SIZE + pos, count))
-		return -EFAULT;
-
-	*offset += count;
-	return count;
+	return simple_read_from_buffer(buf, len, offset,
+				       kernel_config_data + MAGIC_SIZE,
+				       kernel_config_data_size);
 }
 
 static struct file_operations ikconfig_file_ops = {
@@ -89,8 +80,7 @@ static int __init ikconfig_init(void)
 	struct proc_dir_entry *entry;
 
 	/* create the current config file */
-	entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO,
-				  &proc_root);
+	entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO, NULL);
 	if (!entry)
 		return -ENOMEM;
 
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/Makefile linux-2.6.16.46-0.12-027test011/kernel/cpt/Makefile
--- linux-2.6.16.46-0.12.orig/kernel/cpt/Makefile	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/Makefile	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,56 @@
+#
+#
+#  kernel/cpt/Makefile
+#
+#  Copyright (C) 2000-2005  SWsoft
+#  All rights reserved.
+#
+#  Licensing governed by "linux/COPYING.SWsoft" file.
+
+obj-$(CONFIG_VZ_CHECKPOINT) += vzcpt.o vzrst.o
+
+vzcpt-objs := cpt_proc.o cpt_dump.o cpt_obj.o cpt_context.o cpt_process.o \
+	cpt_mm.o cpt_files.o cpt_kernel.o \
+	cpt_socket.o cpt_socket_in.o cpt_tty.o cpt_sysvipc.o cpt_net.o \
+	cpt_conntrack.o cpt_epoll.o
+
+vzrst-objs := rst_proc.o rst_undump.o rst_context.o rst_process.o \
+	rst_mm.o rst_files.o \
+	rst_socket.o rst_socket_in.o rst_tty.o rst_sysvipc.o rst_net.o \
+	rst_conntrack.o rst_epoll.o
+
+ifeq ($(CONFIG_USER_RESOURCE), y)
+vzcpt-objs += cpt_ubc.o
+endif
+
+ifeq ($(CONFIG_USER_RESOURCE), y)
+vzrst-objs += rst_ubc.o
+endif
+
+ifeq ($(CONFIG_INOTIFY_USER), y)
+vzcpt-objs += cpt_inotify.o
+vzrst-objs += rst_inotify.o
+endif
+
+vzrst-objs += cpt_exports.o
+
+ifeq ($(CONFIG_VZ_CHECKPOINT), m)
+vzrst-objs += cpt_obj.o cpt_kernel.o
+endif
+
+ifeq ($(CONFIG_VZ_CHECKPOINT_ITER), y)
+vzcpt-objs += cpt_iterative.o
+vzrst-objs += rst_iterative.o
+endif
+
+ifeq ($(CONFIG_VZ_CHECKPOINT_LAZY), y)
+vzcpt-objs += cpt_pagein.o
+vzrst-objs += rst_pagein.o
+endif
+
+ifeq ($(CONFIG_X86_64), y)
+vzcpt-objs += cpt_x8664.o
+ifeq ($(CONFIG_VZ_CHECKPOINT), m)
+vzrst-objs += cpt_x8664.o
+endif
+endif
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_conntrack.c linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_conntrack.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_conntrack.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_conntrack.c	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,366 @@
+/*
+ *
+ *  kernel/cpt/cpt_conntrack.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/socket.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/unistd.h>
+#include <linux/ve.h>
+#include <linux/vzcalluser.h>
+#include <linux/cpt_image.h>
+#include <linux/icmp.h>
+#include <linux/ip.h>
+
+#if defined(CONFIG_VE_IPTABLES) && \
+    (defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE))
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+
+/* How does it work?
+ *
+ * Network is disabled, so new conntrack entries will not appear.
+ * However, some of them can disappear because of timeouts.
+ *
+ * So, we take read_lock, collect all required information atomically,
+ * essentially, creating parallel "refcount" structures holding pointers.
+ * We delete conntrack timers as well, so the structures cannot disappear
+ * after releasing the lock. Now, after releasing lock we can dump everything
+ * safely. And on exit we restore timers to their original values.
+ *
+ * Note, this approach is not going to work in VE0.
+ */
+
+struct ct_holder
+{
+	struct ct_holder *next;
+	struct ip_conntrack_tuple_hash *cth;
+	int index;
+};
+
+static void encode_tuple(struct cpt_ipct_tuple *v, struct ip_conntrack_tuple *tuple)
+{
+	v->cpt_dst = tuple->dst.ip;
+	v->cpt_dstport = tuple->dst.u.all;
+	v->cpt_protonum = tuple->dst.protonum;
+	v->cpt_dir = tuple->dst.dir;
+
+	v->cpt_src = tuple->src.ip;
+	v->cpt_srcport = tuple->src.u.all;
+}
+
+static int dump_one_expect(struct cpt_ip_connexpect_image *v,
+			   struct ip_conntrack_expect *exp,
+			   int sibling, cpt_context_t *ctx)
+{
+	int err = 0;
+
+	v->cpt_next = sizeof(*v);
+	v->cpt_object = CPT_OBJ_NET_CONNTRACK_EXPECT;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_VOID;
+
+	encode_tuple(&v->cpt_tuple, &exp->tuple);
+	encode_tuple(&v->cpt_mask, &exp->mask);
+	v->cpt_sibling_conntrack = sibling;
+	v->cpt_flags = exp->flags;
+	v->cpt_seq = exp->id;
+	v->cpt_dir = 0;
+	v->cpt_manip_proto = 0;
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+	v->cpt_manip_proto = exp->saved_proto.all;
+	v->cpt_dir = exp->dir;
+#endif
+	v->cpt_timeout = 0;
+	if (exp->master->helper->timeout)
+		v->cpt_timeout = exp->timeout.expires - jiffies;
+	return err;
+}
+
+/* NOTE. We use one page to dump list of expectations. This may be not enough
+ * in theory. In practice there is only one expectation per conntrack record.
+ * Moreover, taking into account that _ALL_ of expecations are saved in one
+ * global list, which is looked up each incoming/outpging packet, the system
+ * would be severely dead when even one conntrack would have so much of
+ * expectations. Shortly, I am not going to repair this.
+ */
+
+static int dump_expect_list(struct ip_conntrack *ct, struct ct_holder *list,
+			    cpt_context_t *ctx)
+{
+	int err = 0;
+	unsigned long pg;
+	struct cpt_ip_connexpect_image *v;
+	struct ip_conntrack_expect *exp;
+
+	if (ct->expecting == 0)
+		return err;
+	if (ct->expecting*sizeof(struct cpt_ip_connexpect_image) > PAGE_SIZE)
+		return -ENOBUFS;
+
+	pg = __get_free_page(GFP_KERNEL);
+	if (!pg)
+		return -ENOMEM;
+	v = (struct cpt_ip_connexpect_image *)pg;
+
+	read_lock_bh(&ip_conntrack_lock);
+	list_for_each_entry(exp, &ve_ip_conntrack_expect_list, list) {
+		int sibling;
+
+		if (exp->master != ct)
+			continue;
+
+		if (ct->helper == NULL) {
+			eprintk_ctx("conntrack: no helper and non-trivial expectation\n");
+			err = -EINVAL;
+			break;
+		}
+
+		sibling = 0;
+#if 0
+		/* That's all? No need to calculate sibling? */
+		if (exp->sibling) {
+			struct ct_holder *c;
+			for (c = list; c; c = c->next) {
+				if (tuplehash_to_ctrack(c->cth) == exp->sibling) {
+					sibling = c->index;
+					break;
+				}
+			}
+			/* NOTE: exp->sibling could be not "confirmed" and, hence,
+			 * out of hash table. We should just ignore such a sibling,
+			 * the connection is going to be retried, the packet
+			 * apparently was lost somewhere.
+			 */
+			if (sibling == 0)
+				dprintk_ctx("sibling conntrack is not found\n");
+		}
+#endif
+
+		/* If the expectation still does not have exp->sibling
+		 * and timer is not running, it is about to die on another
+		 * cpu. Skip it. */
+		if (!sibling &&
+		    ct->helper->timeout &&
+		    !timer_pending(&exp->timeout)) {
+			dprintk_ctx("conntrack: expectation: no timer\n");
+			continue;
+		}
+
+		err = dump_one_expect(v, exp, sibling, ctx);
+		if (err)
+			break;
+
+		v++;
+	}
+	read_unlock_bh(&ip_conntrack_lock);
+
+	if (err == 0 && (unsigned long)v != pg)
+		ctx->write((void*)pg, (unsigned long)v - pg, ctx);
+
+	free_page(pg);
+	return err;
+}
+
+static int dump_one_ct(struct ct_holder *c, struct ct_holder *list,
+		       cpt_context_t *ctx)
+{
+	struct ip_conntrack_tuple_hash *h = c->cth;
+	struct ip_conntrack *ct = tuplehash_to_ctrack(h);
+	struct cpt_ip_conntrack_image v;
+	int err = 0;
+
+	if (sizeof(v.cpt_proto_data) != sizeof(ct->proto)) {
+		eprintk_ctx("conntrack module ct->proto version mismatch\n");
+		return -EINVAL;
+	}
+
+	cpt_open_object(NULL, ctx);
+
+	v.cpt_next = CPT_NULL;
+	v.cpt_object = CPT_OBJ_NET_CONNTRACK;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_ARRAY;
+
+	read_lock_bh(&ip_conntrack_lock);
+	v.cpt_status = ct->status;
+	v.cpt_timeout = ct->timeout.expires - jiffies;
+	v.cpt_ct_helper = (ct->helper != NULL);
+	v.cpt_index = c->index;
+	v.cpt_id = ct->id;
+	v.cpt_mark = 0;
+#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
+	v.cpt_mark = ct->mark;
+#endif
+	encode_tuple(&v.cpt_tuple[0], &ct->tuplehash[0].tuple);
+	encode_tuple(&v.cpt_tuple[1], &ct->tuplehash[1].tuple);
+	memcpy(&v.cpt_proto_data, &ct->proto, sizeof(v.cpt_proto_data));
+	memcpy(&v.cpt_help_data, &ct->help, sizeof(v.cpt_help_data));
+
+	v.cpt_masq_index = 0;
+	v.cpt_initialized = 0;
+	v.cpt_num_manips = 0;
+	v.cpt_nat_helper = 0;
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
+	defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
+	v.cpt_masq_index = ct->nat.masq_index;
+#endif
+	/* "help" data is used by pptp, difficult to support */
+	v.cpt_nat_seq[0].cpt_correction_pos = ct->nat.info.seq[0].correction_pos;
+	v.cpt_nat_seq[0].cpt_offset_before = ct->nat.info.seq[0].offset_before;
+	v.cpt_nat_seq[0].cpt_offset_after = ct->nat.info.seq[0].offset_after;
+	v.cpt_nat_seq[1].cpt_correction_pos = ct->nat.info.seq[1].correction_pos;
+	v.cpt_nat_seq[1].cpt_offset_before = ct->nat.info.seq[1].offset_before;
+	v.cpt_nat_seq[1].cpt_offset_after = ct->nat.info.seq[1].offset_after;
+#endif
+	read_unlock_bh(&ip_conntrack_lock);
+
+	ctx->write(&v, sizeof(v), ctx);
+
+	err = dump_expect_list(ct, list, ctx);
+
+	cpt_close_object(ctx);
+	return err;
+}
+
+int cpt_dump_ip_conntrack(cpt_context_t * ctx)
+{
+	struct ct_holder *ct_list = NULL;
+	struct ct_holder *c, **cp;
+	int err = 0;
+	int index = 0;
+	int idx;
+
+	if (get_exec_env()->_ip_conntrack == NULL)
+		return 0;
+
+	for (idx = atomic_read(&(get_exec_env()->_ip_conntrack->_ip_conntrack_count)); idx >= 0; idx--) {
+		c = kmalloc(sizeof(struct ct_holder), GFP_KERNEL);
+		if (c == NULL) {
+			err = -ENOMEM;
+			goto done;
+		}
+		memset(c, 0, sizeof(struct ct_holder));
+		c->next = ct_list;
+		ct_list = c;
+	}
+
+	c = ct_list;
+
+	read_lock_bh(&ip_conntrack_lock);
+	for (idx = 0; idx < ip_conntrack_htable_size; idx++) {
+		struct ip_conntrack_tuple_hash *h;
+		list_for_each_entry(h, &ve_ip_conntrack_hash[idx], list) {
+			/* Skip reply tuples, they are covered by original
+			 * direction. */
+			if (DIRECTION(h))
+				continue;
+
+			/* Oops, we have not enough of holders...
+			 * It is impossible. */
+			if (unlikely(c == NULL)) {
+				read_unlock_bh(&ip_conntrack_lock);
+				eprintk_ctx("unexpected conntrack appeared\n");
+				err = -ENOMEM;
+				goto done;
+			}
+
+			/* If timer is not running, it means that it
+			 * has just been scheduled on another cpu.
+			 * We should skip this conntrack, it is about to be
+			 * destroyed. */
+			if (!del_timer(&tuplehash_to_ctrack(h)->timeout)) {
+				dprintk_ctx("conntrack: no timer\n");
+				continue;
+			}
+
+			/* Timer is deleted. refcnt is _not_ decreased.
+			 * We are going to restore the timer on exit
+			 * from this function. */
+			c->cth = h;
+			c->index = ++index;
+			c = c->next;
+		}
+	}
+	read_unlock_bh(&ip_conntrack_lock);
+
+	/* No conntracks? Good. */
+	if (index == 0)
+		goto done;
+
+	/* Comb the list a little. */
+	cp = &ct_list;
+	while ((c = *cp) != NULL) {
+		/* Discard unused entries; they can appear, if some
+		 * entries were timed out since we preallocated the list.
+		 */
+		if (c->cth == NULL) {
+			*cp = c->next;
+			kfree(c);
+			continue;
+		}
+
+		/* Move conntracks attached to expectations to the beginning
+		 * of the list. */
+		if (tuplehash_to_ctrack(c->cth)->master && c != ct_list) {
+			*cp = c->next;
+			c->next = ct_list;
+			ct_list = c;
+			dprintk_ctx("conntrack: %d moved in list\n", c->index);
+			continue;
+		}
+		cp = &c->next;
+	}
+
+	cpt_open_section(ctx, CPT_SECT_NET_CONNTRACK);
+
+	for (c = ct_list; c; c = c->next) {
+		err = dump_one_ct(c, ct_list, ctx);
+		if (err)
+			goto done;
+	}
+
+	cpt_close_section(ctx);
+
+done:
+	while ((c = ct_list) != NULL) {
+		ct_list = c->next;
+		if (c->cth) {
+			/* Restore timer. refcnt is preserved. */
+			add_timer(&tuplehash_to_ctrack(c->cth)->timeout);
+		}
+		kfree(c);
+	}
+	return err;
+}
+
+#endif
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_context.c linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_context.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_context.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_context.c	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,258 @@
+/*
+ *
+ *  kernel/cpt/cpt_context.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+
+static void file_write(const void *addr, size_t count, struct cpt_context *ctx)
+{
+	mm_segment_t oldfs;
+	ssize_t err = -EBADF;
+	struct file *file = ctx->file;
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	if (file)
+		err = file->f_op->write(file, addr, count, &file->f_pos);
+	set_fs(oldfs);
+	if (err != count && !ctx->write_error)
+		ctx->write_error = err < 0 ? err : -EIO;
+}
+
+static void file_pwrite(void *addr, size_t count, struct cpt_context *ctx, loff_t pos)
+{
+	mm_segment_t oldfs;
+	ssize_t err = -EBADF;
+	struct file *file = ctx->file;
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	if (file)
+		err = file->f_op->write(file, addr, count, &pos);
+	set_fs(oldfs);
+	if (err != count && !ctx->write_error)
+		ctx->write_error = err < 0 ? err : -EIO;
+}
+
+static void file_align(struct cpt_context *ctx)
+{
+	struct file *file = ctx->file;
+
+	if (file)
+		file->f_pos = CPT_ALIGN(file->f_pos);
+}
+
+void cpt_context_init(struct cpt_context *ctx)
+{
+	int i;
+
+	memset(ctx, 0, sizeof(*ctx));
+
+	init_MUTEX(&ctx->main_sem);
+	ctx->refcount = 1;
+
+	ctx->current_section = -1;
+	ctx->current_object = -1;
+	ctx->pagesize = PAGE_SIZE;
+	ctx->write = file_write;
+	ctx->pwrite = file_pwrite;
+	ctx->align = file_align;
+	for (i=0; i < CPT_SECT_MAX; i++)
+		ctx->sections[i] = CPT_NULL;
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	init_completion(&ctx->pgin_notify);
+#endif
+	cpt_object_init(ctx);
+}
+
+int cpt_open_dumpfile(struct cpt_context *ctx)
+{
+	ctx->tmpbuf = (char*)__get_free_page(GFP_KERNEL);
+	if (ctx->tmpbuf == NULL)
+		return -ENOMEM;
+	__cpt_release_buf(ctx);
+	return 0;
+}
+
+int cpt_close_dumpfile(struct cpt_context *ctx)
+{
+	if (ctx->file) {
+		fput(ctx->file);
+		ctx->file = NULL;
+	}
+	if (ctx->tmpbuf) {
+		free_page((unsigned long)ctx->tmpbuf);
+		ctx->tmpbuf = NULL;
+	}
+	if (ctx->write_error)
+		eprintk_ctx("error while writing dump file: %d\n", ctx->write_error);
+	return ctx->write_error;
+}
+
+int cpt_major_hdr_out(struct cpt_context *ctx)
+{
+	struct cpt_major_hdr hdr;
+
+	if (ctx->file == NULL)
+		return 0;
+
+	memset(&hdr, 0, sizeof(hdr));
+	hdr.cpt_signature[0] = CPT_SIGNATURE0;
+	hdr.cpt_signature[1] = CPT_SIGNATURE1;
+	hdr.cpt_signature[2] = CPT_SIGNATURE2;
+	hdr.cpt_signature[3] = CPT_SIGNATURE3;
+	hdr.cpt_hdrlen = sizeof(hdr);
+	hdr.cpt_image_version = CPT_VERSION_18;
+#ifdef CONFIG_X86_64
+	hdr.cpt_os_arch = CPT_OS_ARCH_EMT64;
+#elif defined(CONFIG_X86_32)
+	hdr.cpt_os_arch = CPT_OS_ARCH_I386;
+#elif defined(CONFIG_IA64)
+	hdr.cpt_os_arch = CPT_OS_ARCH_IA64;
+#else
+#error	Arch is not supported
+#endif
+	hdr.cpt_ve_features = (__u32)ctx->features;
+	hdr.cpt_ve_features2 = (__u32)(ctx->features>>32);
+	hdr.cpt_pagesize = (__u16)PAGE_SIZE;
+	hdr.cpt_hz = HZ;
+	hdr.cpt_start_jiffies64 = ctx->virt_jiffies64;
+	hdr.cpt_start_sec = ctx->start_time.tv_sec;
+	hdr.cpt_start_nsec = ctx->start_time.tv_nsec;
+	hdr.cpt_cpu_caps[0] = ctx->src_cpu_flags;
+	hdr.cpt_kernel_config[0] = ctx->kernel_config_flags;
+	hdr.cpt_iptables_mask = ctx->iptables_mask;
+
+	ctx->write(&hdr, sizeof(hdr), ctx);
+	return 0;
+}
+
+int cpt_close_section(struct cpt_context *ctx)
+{
+	if (ctx->file && ctx->current_section >= 0) {
+		__u64 next = ctx->file->f_pos - ctx->current_section;
+		ctx->pwrite(&next, 8, ctx, ctx->current_section);
+		ctx->current_section = -1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(cpt_close_section);
+
+int cpt_open_section(struct cpt_context *ctx, __u32 type)
+{
+	struct cpt_section_hdr hdr;
+
+	if (ctx->file == NULL)
+		return 0;
+
+	cpt_close_section(ctx);
+
+	ctx->current_section = ctx->file->f_pos;
+	ctx->sections[type] = ctx->current_section;
+
+	hdr.cpt_next = 0;
+	hdr.cpt_section = type;
+	hdr.cpt_hdrlen = sizeof(hdr);
+	hdr.cpt_align = 0;
+	ctx->write(&hdr, sizeof(hdr), ctx);
+
+	return 0;
+}
+EXPORT_SYMBOL(cpt_open_section);
+
+
+int cpt_close_object(struct cpt_context *ctx)
+{
+	if (ctx->file && ctx->current_object >= 0) {
+		__u64 next = ctx->file->f_pos - ctx->current_object;
+		ctx->pwrite(&next, 8, ctx, ctx->current_object);
+		ctx->current_object = -1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(cpt_close_object);
+
+int cpt_open_object(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	if (ctx->file == NULL)
+		return 0;
+
+	cpt_close_object(ctx);
+
+	ctx->current_object = ctx->file->f_pos;
+	if (obj)
+		cpt_obj_setpos(obj, ctx->current_object, ctx);
+
+	return 0;
+}
+EXPORT_SYMBOL(cpt_open_object);
+
+int cpt_push_object(loff_t *saved, struct cpt_context *ctx)
+{
+	if (ctx->file) {
+		*saved = ctx->current_object;
+		ctx->current_object = ctx->file->f_pos;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(cpt_push_object);
+
+int cpt_pop_object(loff_t *saved, struct cpt_context *ctx)
+{
+	ctx->current_object = *saved;
+	return 0;
+}
+EXPORT_SYMBOL(cpt_pop_object);
+
+int cpt_dump_tail(struct cpt_context *ctx)
+{
+	struct cpt_major_tail hdr;
+	int i;
+
+	if (ctx->file == NULL)
+		return 0;
+
+	cpt_open_section(ctx, CPT_SECT_TRAILER);
+	memset(&hdr, 0, sizeof(hdr));
+	hdr.cpt_next = sizeof(hdr);
+	hdr.cpt_object = CPT_OBJ_TRAILER;
+	hdr.cpt_hdrlen = sizeof(hdr);
+	hdr.cpt_content = CPT_CONTENT_VOID;
+	hdr.cpt_lazypages = 0;
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	hdr.cpt_lazypages = ctx->lazypages;
+#endif
+	hdr.cpt_64bit = ctx->tasks64;
+	hdr.cpt_signature[0] = CPT_SIGNATURE0;
+	hdr.cpt_signature[1] = CPT_SIGNATURE1;
+	hdr.cpt_signature[2] = CPT_SIGNATURE2;
+	hdr.cpt_signature[3] = CPT_SIGNATURE3;
+	hdr.cpt_nsect = CPT_SECT_MAX_INDEX;
+	for (i = 0; i < CPT_SECT_MAX_INDEX; i++)
+		hdr.cpt_sections[i] = ctx->sections[i];
+
+	ctx->write(&hdr, sizeof(hdr), ctx);
+	cpt_close_section(ctx);
+	return 0;
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_context.h linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_context.h
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_context.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_context.h	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,210 @@
+#include <linux/fs.h>
+#include <asm/uaccess.h>
+#include <ub/beancounter.h>
+
+#define	CPT_CTX_ERROR		-1
+#define	CPT_CTX_IDLE		0
+#define CPT_CTX_SUSPENDING	1
+#define	CPT_CTX_SUSPENDED	2
+#define CPT_CTX_DUMPING		3
+#define CPT_CTX_UNDUMPING	4
+#define CPT_CTX_UNDUMPED	5
+
+#define CPT_TID(tsk)   (tsk)->pid, virt_pid(tsk), (tsk)->comm
+#define CPT_FID		"%d,%d(%s)"
+
+
+typedef struct cpt_context
+{
+	struct list_head ctx_list;
+	int	refcount;
+	int	ctx_state;
+	int	objcount;
+	int	sticky;
+	struct semaphore main_sem;
+
+	struct file *errorfile;
+	struct file *statusfile;
+	struct file *lockfile;
+
+	int	errno;
+	char	*error_msg;
+	loff_t	err_offset;
+
+	struct file	*file;
+	char		*tmpbuf;
+	int		pagesize;
+#ifdef CONFIG_VZ_CHECKPOINT_ITER
+	int		iter_done;
+	void		*iter_dir;
+	struct user_beancounter *iter_ub;
+#endif
+	loff_t		current_section;
+	loff_t		current_object;
+
+	loff_t		sections[CPT_SECT_MAX];
+
+	__u32		errormask;
+	__u32		write_error;
+
+	struct list_head object_array[CPT_OBJ_MAX];
+
+	void		(*write)(const void *addr, size_t count, struct cpt_context *ctx);
+	void		(*pwrite)(void *addr, size_t count, struct cpt_context *ctx, loff_t pos);
+	ssize_t		(*read)(void *addr, size_t count, struct cpt_context *ctx);
+	ssize_t		(*pread)(void *addr, size_t count, struct cpt_context *ctx, loff_t pos);
+	void		(*align)(struct cpt_context *ctx);
+	int		ve_id;
+	int		contextid;
+	struct timespec cpt_monotonic_time; /* Host monotonic time at the moment of cpt/rst
+					     * corresponging to start_time */
+	__u64		virt_jiffies64;	/* Virtual jiffies64. It is == cpt_jiffies64 when
+					 * VE did not migrate. */
+	struct timespec	start_time;
+	struct timespec delta_time;
+	__s64		delta_nsec;
+	int		image_version;
+	__u16		image_arch;
+	__u64		iptables_mask;
+	__u64		features;
+
+#define CPT_ANONVMA_HBITS (sizeof(void*) == 4 ? 10 : 9)
+#define CPT_ANONVMA_HSIZE (1<<CPT_ANONVMA_HBITS)
+	struct hlist_head *anonvmas;
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	struct file	*pagein_file_in;
+	struct file	*pagein_file_out;
+	int		lazy_vm;
+	int		lazypages;
+	int		lazytype;
+	struct task_struct	*pgin_task;
+	unsigned long	last_pagein;
+	struct pagein_desc	**pgin_dir;
+	struct pgin_device	*pagein_dev;
+	struct completion	pgin_notify;
+	struct completion	*pgind_completion;
+	struct swap_info_struct	*pgin_swp;
+#endif
+	int		tasks64;
+	__u32		src_cpu_flags;
+	__u32		dst_cpu_flags;
+	__u32		kernel_config_flags;
+
+	__u32		last_vpid;
+
+	struct filejob  *filejob_queue;
+
+	int		slm_count;
+
+#ifdef CONFIG_USER_RESOURCE
+	/* Store here ubc limits and barriers during undumping,
+	   and restore them before resuming */
+	struct ubparm	saved_ubc[UB_RESOURCES];
+#endif
+} cpt_context_t;
+
+typedef struct {
+	int pid;
+	cpt_context_t *ctx;
+	struct completion done;
+} pagein_info_t;
+
+int pagein_info_printf(char *buf, cpt_context_t *ctx);
+
+int cpt_open_dumpfile(struct cpt_context *);
+int cpt_close_dumpfile(struct cpt_context *);
+int rst_open_dumpfile(struct cpt_context *);
+void rst_close_dumpfile(struct cpt_context *);
+void cpt_context_init(struct cpt_context *);
+void rst_context_init(struct cpt_context *);
+void cpt_context_destroy(struct cpt_context *);
+
+void rst_report_error(int err, cpt_context_t *ctx);
+
+
+int cpt_major_hdr_out(struct cpt_context *ctx);
+int cpt_dump_tail(struct cpt_context *ctx);
+int cpt_close_section(struct cpt_context *ctx);
+int cpt_open_section(struct cpt_context *ctx, __u32 type);
+int cpt_close_object(struct cpt_context *ctx);
+int cpt_open_object(cpt_object_t *obj, struct cpt_context *ctx);
+int cpt_push_object(loff_t *saved, struct cpt_context *ctx);
+int cpt_pop_object(loff_t *saved, struct cpt_context *ctx);
+
+int rst_get_section(int type, struct cpt_context * ctx, loff_t *, loff_t *);
+__u8 *__rst_get_name(loff_t *pos_p, struct cpt_context *ctx);
+__u8 *rst_get_name(loff_t pos, struct cpt_context *ctx);
+void rst_put_name(__u8 *name, struct cpt_context *ctx);
+int _rst_get_object(int type, loff_t pos, void *tmp, int size, struct cpt_context *ctx);
+void * __rst_get_object(int type, loff_t pos, struct cpt_context *ctx);
+
+#define rst_get_object(type, pos, tmp, ctx) \
+ _rst_get_object((type), (pos), (tmp), sizeof(*(tmp)), (ctx))
+
+extern int debug_level;
+
+#define cpt_printk(lvl, fmt, args...)	do {	\
+		if (lvl <= debug_level)		\
+			printk(fmt, ##args);	\
+	} while (0)
+
+#define dprintk(a...) cpt_printk(3, "CPT DBG: " a)
+#define dprintk_ctx(f, arg...) dprintk("%p,%u: " f, ctx, ctx->ve_id, ##arg)
+
+#define wprintk(a...) cpt_printk(2, "CPT WRN: " a)
+#define wprintk_ctx(f, arg...) wprintk("%p,%u: " f, ctx, ctx->ve_id, ##arg)
+
+#define eprintk(a...) cpt_printk(1, "CPT ERR: " a)
+#define eprintk_ctx(f, arg...)						\
+do {									\
+	eprintk("%p,%u :" f, ctx, ctx->ve_id, ##arg);			\
+	if (ctx->error_msg && ctx->err_offset < PAGE_SIZE)		\
+		ctx->err_offset += snprintf((char*)(ctx->error_msg +	\
+				ctx->err_offset),			\
+			       	PAGE_SIZE - ctx->err_offset, f, ##arg);	\
+} while(0)
+
+#define CPT_TMPBUF_FREE 0x789adf12
+#define CPT_TMPBUF_BUSY 0xabcd9876
+
+static inline void *cpt_get_buf(cpt_context_t *ctx)
+{
+	void *buf = ctx->tmpbuf;
+
+	BUG_ON(*(u32*)(buf + PAGE_SIZE - 4) != CPT_TMPBUF_FREE);
+	*(u32*)(buf + PAGE_SIZE - 4) = CPT_TMPBUF_BUSY;
+	return buf;
+}
+
+static inline void __cpt_release_buf(cpt_context_t *ctx)
+{
+	void *buf = ctx->tmpbuf;
+
+	*(u32*)(buf + PAGE_SIZE - 4) = CPT_TMPBUF_FREE;
+}
+
+static inline void cpt_release_buf(cpt_context_t *ctx)
+{
+	void *buf = ctx->tmpbuf;
+
+	BUG_ON(*(u32*)(buf + PAGE_SIZE - 4) != CPT_TMPBUF_BUSY);
+	*(u32*)(buf + PAGE_SIZE - 4) = CPT_TMPBUF_FREE;
+}
+
+static inline void cpt_flush_error(cpt_context_t *ctx)
+{
+	mm_segment_t oldfs;
+
+	if (ctx->errorfile && ctx->error_msg && ctx->err_offset) {
+		if (ctx->errorfile->f_op && ctx->errorfile->f_op->write) {
+			oldfs = get_fs();
+			set_fs(KERNEL_DS);
+			ctx->errorfile->f_op->write(ctx->errorfile,
+				ctx->error_msg, ctx->err_offset,
+				&ctx->errorfile->f_pos);
+			set_fs(oldfs);
+		}
+		ctx->error_msg[0] = 0;
+		ctx->err_offset = 0;
+	}
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_dump.c linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_dump.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_dump.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_dump.c	2007-08-28 17:35:36.000000000 +0400
@@ -0,0 +1,1103 @@
+/*
+ *
+ *  kernel/cpt/cpt_dump.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/ptrace.h>
+#include <linux/smp_lock.h>
+#include <linux/ve.h>
+#include <linux/ve_proto.h>
+#include <linux/virtinfo.h>
+#include <linux/virtinfoscp.h>
+#include <ub/ub_task.h>
+#include <linux/cpt_image.h>
+#include <linux/namespace.h>
+#include <linux/netdevice.h>
+#include <linux/nfcalls.h>
+#include <linux/dcache.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_dump.h"
+#include "cpt_files.h"
+#include "cpt_mm.h"
+#include "cpt_process.h"
+#include "cpt_net.h"
+#include "cpt_socket.h"
+#include "cpt_ubc.h"
+#include "cpt_kernel.h"
+
+
+static int vps_child_level(task_t *root, task_t *c)
+{
+	int level = 0;
+	int veid = VE_TASK_INFO(c)->owner_env->veid;
+
+	while (VE_TASK_INFO(c)->owner_env->veid == veid) {
+		if (c->pid != c->tgid)
+			c = c->group_leader;
+		if (c == root)
+			return level;
+
+		c = c->real_parent;
+		level++;
+	}
+	return -1;
+}
+
+static inline int freezable(struct task_struct * p)
+{
+	if (p->exit_state)
+		return 0;
+
+	switch (p->state) {
+	case EXIT_ZOMBIE:
+	case EXIT_DEAD:
+	case TASK_STOPPED:
+#if TASK_TRACED != TASK_STOPPED
+	case TASK_TRACED:
+#endif
+		return 0;
+	default:
+		return 1;
+	}
+}
+
+static void wake_ve(cpt_context_t *ctx)
+{
+	task_t *p, *g;
+
+	do_each_thread_ve(g, p) {
+		spin_lock_irq(&p->sighand->siglock);
+		if (p->flags & PF_FROZEN) {
+			p->flags &= ~PF_FROZEN;
+			wake_up_process(p);
+		}
+		spin_unlock_irq(&p->sighand->siglock);
+	} while_each_thread_ve(g, p);
+}
+
+/*
+ * Some comment is necessary about PF_FREEZE,PF_FROZEN,TIF_FREEZE...
+ *
+ * SWSUSP uses PF_FREEZE flag in tsk->flags raising it in context
+ * of another process. Apparently, it is unacceptable on SMP.
+ * Let's take freeze_processes() in kernel/power/process.c as an example.
+ * Unserialized modifications tsk->flags easily
+ * (believe or not, but it happens with probability of almost 100% :-))
+ * creates the situation when setting PF_FREEZE in freeze_processes(),
+ * which quickly spins raising PF_FREEZE of all the processes,
+ * _clears_ PF_FROZEN just set in refrigerator(), so that suspend deadlocks.
+ *
+ * So, to make things clean, we require that those flags may be modified
+ * only under tsk->sighand->siglock, which is quite natural because PF_FREEZE
+ * is just a kind of signal.
+ *
+ * It is not enough, because we are still not allowed to change tsk->flags
+ * in context of another process, we can corrupt another flags, when the process
+ * running on another cpu modifies them. So, we use TIF_FREEZE in thread flags,
+ * which can be changed atomically.
+ *
+ * PF_FROZEN also changes in context of another process, but this happens
+ * only when the process is already in refrigerator() which does not modify
+ * tsk->flags.
+ */
+
+enum
+{
+	OBSTACLE_NOGO = -1,
+	OBSTACLE_TIMEOUT = -2,
+	OBSTACLE_TRYAGAIN = -3,
+};
+
+#define SUSPEND_TIMEOUT	(10UL*HZ)
+
+static int vps_stop_tasks(struct cpt_context *ctx)
+{
+	unsigned long start_time = jiffies;
+	unsigned long target, timeout;
+	task_t *p, *g;
+	int todo;
+	int round = 0;
+
+	do_gettimespec(&ctx->start_time);
+	do_posix_clock_monotonic_gettime(&ctx->cpt_monotonic_time);
+	ctx->virt_jiffies64 = get_jiffies_64() + get_exec_env()->jiffies_fixup;
+
+	read_lock(&tasklist_lock);
+
+	atomic_inc(&get_exec_env()->suspend);
+	timeout = HZ/5;
+	target = jiffies + timeout;
+
+	for(;;) {
+		task_t *root;
+		todo = 0;
+
+		root = find_task_by_pid_ve(1);
+		if (!root) {
+			read_unlock(&tasklist_lock);
+			eprintk_ctx("cannot find ve init\n");
+			atomic_dec(&get_exec_env()->suspend);
+			return -ESRCH;
+		}
+
+		do_each_thread_ve(g, p) {
+			if (vps_child_level(root, p) >= 0) {
+				if (!is_virtual_pid(virt_pid(p))) {
+					eprintk_ctx("external process %d/%d(%s) inside VE (e.g. vzctl enter or vzctl exec).\n",
+							virt_pid(p), p->pid, p->comm);
+					todo = OBSTACLE_NOGO;
+					goto out;
+				}
+				if (!is_virtual_pid(virt_pgid(p))) {
+					eprintk_ctx("external process group %d/%d(%s) inside VE (e.g. vzctl enter or vzctl exec).\n",
+							virt_pgid(p), p->pid, p->comm);
+					todo = OBSTACLE_NOGO;
+					goto out;
+				}
+				if (!is_virtual_pid(virt_sid(p))) {
+					eprintk_ctx("external process session %d/%d(%s) inside VE (e.g. vzctl enter or vzctl exec).\n",
+							virt_sid(p), p->pid, p->comm);
+					todo = OBSTACLE_NOGO;
+					goto out;
+				}
+				if (p->vfork_done) {
+					/* Task between vfork()...exec()
+					 * cannot be frozen, because parent
+					 * wait in uninterruptible state.
+					 * So, we do nothing, waiting for
+					 * exec(), unless:
+					 */
+					if (p->state == TASK_STOPPED ||
+					    p->state == TASK_TRACED) {
+						eprintk_ctx("task " CPT_FID " is stopped while vfork(). "
+								"Checkpointing is impossible.\n",
+								CPT_TID(p));
+						todo = OBSTACLE_NOGO;
+						/* It is fatal, _user_ stopped
+						 * vfork()ing task, so that we
+						 * cannot suspend now.
+						 */
+					} else {
+						todo = OBSTACLE_TRYAGAIN;
+					}
+					goto out;
+				}
+				if (p->signal->group_exit_task &&
+				    p->signal->notify_count) {
+					/* exec() waits for threads' death */
+					wprintk_ctx("task " CPT_FID " waits for threads' death\n", CPT_TID(p));
+					todo = OBSTACLE_TRYAGAIN;
+					goto out;
+				}
+				if (p->state == TASK_TRACED
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)
+				    && !p->stopped_state
+#endif
+				    ) {
+					int ptrace_id = p->pn_state;
+					/* Debugger waits for signal. */
+					switch (ptrace_id) {
+					case PN_STOP_TF:
+					case PN_STOP_TF_RT:
+					case PN_STOP_ENTRY:
+					case PN_STOP_FORK:
+					case PN_STOP_VFORK:
+					case PN_STOP_SIGNAL:
+					case PN_STOP_EXIT:
+					case PN_STOP_LEAVE:
+						break;
+					default:
+						eprintk_ctx("task " CPT_FID " is stopped by debugger while %d.\n", CPT_TID(p), ptrace_id);
+						todo = OBSTACLE_NOGO;
+						goto out;
+					}
+				}
+				if (p->flags & PF_NOFREEZE) {
+					eprintk_ctx("task " CPT_FID " is unfreezable. Checkpointing is impossible.\n", CPT_TID(p));
+					todo = OBSTACLE_NOGO;
+					goto out;
+				}
+
+				if (!freezable(p))
+					continue;
+
+				spin_lock_irq(&p->sighand->siglock);
+				if (!(p->flags & PF_FROZEN)) {
+					set_tsk_thread_flag(p, TIF_FREEZE);
+					signal_wake_up(p, 0);
+				}
+				spin_unlock_irq(&p->sighand->siglock);
+
+				if (p->flags & PF_FROZEN) {
+					if (p->state != TASK_UNINTERRUPTIBLE)
+						printk("Holy Crap 1 %ld " CPT_FID "\n", p->state, CPT_TID(p));
+					continue;
+				}
+
+				if (round == 10)
+					wprintk_ctx(CPT_FID " is running\n", CPT_TID(p));
+
+				todo++;
+			} else {
+				if (p != current) {
+					eprintk_ctx("foreign process %d/%d(%s) inside VE (e.g. vzctl enter or vzctl exec).\n",
+							virt_pid(p), p->pid, p->comm);
+					todo = OBSTACLE_NOGO;
+					goto out;
+				}
+			}
+		} while_each_thread_ve(g, p);
+
+		if (todo > 0) {
+			/* No visible obstacles, but VE did not freeze
+			 * for timeout. Interrupt suspend, if it is major
+			 * timeout or signal; if it is minor timeout
+			 * we will wake VE and restart suspend.
+			 */
+			if (time_after(jiffies, start_time + SUSPEND_TIMEOUT)
+			    || signal_pending(current))
+				todo = OBSTACLE_TIMEOUT;
+			else if (time_after(jiffies, target))
+				todo = OBSTACLE_TRYAGAIN;
+		}
+
+out:
+		if (todo < 0) {
+			atomic_dec(&get_exec_env()->suspend);
+
+			wake_ve(ctx);
+
+#if 0
+			/* This is sign of failure of printk(), which is not
+			 * ours. So, no prefixes. */
+			printk(">\n");
+#endif
+		}
+
+		read_unlock(&tasklist_lock);
+
+		if (!todo) {
+			atomic_dec(&get_exec_env()->suspend);
+			return 0;
+		}
+
+		switch (todo) {
+		case OBSTACLE_NOGO:
+			eprintk_ctx("suspend is impossible now.\n");
+			return -EAGAIN;
+
+		case OBSTACLE_TIMEOUT:
+			eprintk_ctx("interrupted or timed out.\n");
+			return -EINTR;
+
+		case OBSTACLE_TRYAGAIN:
+			if (time_after(jiffies, start_time + SUSPEND_TIMEOUT) ||
+			    signal_pending(current)) {
+				wprintk_ctx("suspend timed out\n");
+				return -EAGAIN;
+			}
+
+			wprintk_ctx("minor suspend timeout (%lu) expired, "
+				    "trying again\n", timeout);
+
+			/* Try again. VE is awake, give it some time to run. */
+			current->state = TASK_INTERRUPTIBLE;
+			schedule_timeout(HZ);
+
+			/* After a short wait restart suspend
+			 * with longer timeout */
+			atomic_inc(&get_exec_env()->suspend);
+			timeout = min(timeout<<1, SUSPEND_TIMEOUT);
+			target = jiffies + timeout;
+			break;
+
+		default:
+			if (round > 0) {
+				/* VE is partially frozen, give processes
+				 * a chance to enter to refrigerator(). */
+				current->state = TASK_INTERRUPTIBLE;
+				schedule_timeout(HZ/20);
+			} else {
+				yield();
+			}
+		}
+
+		read_lock(&tasklist_lock);
+		round++;
+	}
+}
+
+static int cpt_unlock_ve(struct cpt_context *ctx)
+{
+	struct ve_struct *env;
+
+	env = get_ve_by_id(ctx->ve_id);
+	if (!env)
+		return -ESRCH;
+	down_write(&env->op_sem);
+	env->is_locked = 0;
+	up_write(&env->op_sem);
+	put_ve(env);
+	return 0;
+}
+
+int cpt_resume(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	virtinfo_notifier_call(VITYPE_SCP, VIRTINFO_SCP_DMPFIN, ctx);
+
+	cpt_unlock_sockets(ctx);
+
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	if (ctx->pgin_task) {
+		wait_for_completion(&ctx->pgin_notify);
+		put_task_struct(ctx->pgin_task);
+		ctx->pgin_task = NULL;
+	}
+#endif
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+
+		spin_lock_irq(&tsk->sighand->siglock);
+		if (tsk->flags & PF_FROZEN) {
+			tsk->flags &= ~PF_FROZEN;
+			wake_up_process(tsk);
+		} else if (freezable(tsk)) {
+			eprintk_ctx("strange, %s not frozen\n", tsk->comm );
+		}
+		spin_unlock_irq(&tsk->sighand->siglock);
+		put_task_struct(tsk);
+	}
+
+	cpt_resume_network(ctx);
+
+	cpt_unlock_ve(ctx);
+
+	cpt_finish_ubc(ctx);
+	cpt_object_destroy(ctx);
+	return 0;
+}
+
+int cpt_kill(struct cpt_context *ctx)
+{
+	int err = 0;
+	struct ve_struct *env;
+	cpt_object_t *obj;
+	task_t *root_task = NULL;
+	long delay;
+
+	if (!ctx->ve_id)
+		return -EINVAL;
+
+	env = get_ve_by_id(ctx->ve_id);
+	if (!env)
+		return -ESRCH;
+
+	/* from here cpt_kill succeeds */
+	virtinfo_notifier_call(VITYPE_SCP, VIRTINFO_SCP_DMPFIN, ctx);
+
+	if (current->ve_task_info.owner_env == env) {
+		wprintk_ctx("attempt to kill ve from inside, escaping...\n");
+		ve_move_task(current, env, get_ve0());
+	}
+
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	if (ctx->pgin_task) {
+		wait_for_completion(&ctx->pgin_notify);
+		put_task_struct(ctx->pgin_task);
+		ctx->pgin_task = NULL;
+	}
+#endif
+
+	cpt_kill_sockets(ctx);
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+
+		if (tsk->exit_state) {
+			put_task_struct(tsk);
+			continue;
+		}
+
+		if (virt_pid(tsk) == 1) {
+			root_task = tsk;
+			continue;
+		}
+
+		tsk->clear_child_tid = NULL;
+		if (tsk->ptrace) {
+			write_lock_irq(&tasklist_lock);
+			tsk->ptrace = 0;
+			if (!list_empty(&tsk->ptrace_list)) {
+				list_del_init(&tsk->ptrace_list);
+				REMOVE_LINKS(tsk);
+				tsk->parent = tsk->real_parent;
+				SET_LINKS(tsk);
+			}
+			write_unlock_irq(&tasklist_lock);
+		}
+
+		send_sig(SIGKILL, tsk, 1);
+
+		spin_lock_irq(&tsk->sighand->siglock);
+		sigfillset(&tsk->blocked);
+		sigdelsetmask(&tsk->blocked, sigmask(SIGKILL));
+		set_tsk_thread_flag(tsk, TIF_SIGPENDING);
+		if (tsk->flags & PF_FROZEN)
+			tsk->flags &= ~PF_FROZEN;
+		spin_unlock_irq(&tsk->sighand->siglock);
+
+		wake_up_process(tsk);
+		put_task_struct(tsk);
+	}
+
+	yield();
+
+	if (root_task != NULL) {
+		send_sig(SIGKILL, root_task, 1);
+
+		spin_lock_irq(&root_task->sighand->siglock);
+		sigfillset(&root_task->blocked);
+		sigdelsetmask(&root_task->blocked, sigmask(SIGKILL));
+		set_tsk_thread_flag(root_task, TIF_SIGPENDING);
+		clear_tsk_thread_flag(root_task, TIF_FREEZE);
+		if (root_task->flags & PF_FROZEN)
+			root_task->flags &= ~PF_FROZEN;
+		spin_unlock_irq(&root_task->sighand->siglock);
+
+		wake_up_process(root_task);
+		put_task_struct(root_task);
+	}
+
+	cpt_finish_ubc(ctx);
+	cpt_object_destroy(ctx);
+
+	delay = 1;
+	while (atomic_read(&env->counter) != 1) {
+		if (signal_pending(current))
+			break;
+		current->state = TASK_INTERRUPTIBLE;
+		delay = (delay < HZ) ? (delay << 1) : HZ;
+		schedule_timeout(delay);
+	}
+	put_ve(env);
+
+	return err;
+}
+
+#ifdef CONFIG_USER_RESOURCE
+static void collect_task_ubc(task_t *t, struct cpt_context *ctx)
+{
+	struct task_beancounter *tbc;
+
+	tbc = &(t->task_bc);
+	cpt_add_ubc(tbc->exec_ub, ctx);
+	cpt_add_ubc(tbc->task_ub, ctx);
+	cpt_add_ubc(tbc->fork_sub, ctx);
+}
+#else
+static void inline collect_task_ubc(task_t *t, struct cpt_context *ctx)
+{ return; }
+#endif
+
+static cpt_object_t * remember_task(task_t * child, cpt_object_t * head,
+				    cpt_context_t * ctx)
+{
+	cpt_object_t *cobj;
+
+	if (freezable(child) && !(child->flags&PF_FROZEN)) {
+		eprintk_ctx("process " CPT_FID " is not frozen\n", CPT_TID(child));
+		put_task_struct(child);
+		return NULL;
+	}
+
+	if (lookup_cpt_object(CPT_OBJ_TASK, child, ctx)) BUG();
+	if ((cobj = alloc_cpt_object(GFP_KERNEL, ctx)) == NULL) {
+		put_task_struct(child);
+		return NULL;
+	}
+	cobj->o_count = 1;
+	cpt_obj_setobj(cobj, child, ctx);
+	insert_cpt_object(CPT_OBJ_TASK, cobj, head, ctx);
+	collect_task_ubc(child, ctx);
+	return cobj;
+}
+
+static int vps_collect_tasks(struct cpt_context *ctx)
+{
+	int err = -ESRCH;
+	cpt_object_t *obj;
+	task_t *root;
+	read_lock(&tasklist_lock);
+	root = find_task_by_pid_ve(1);
+	if (root)
+		get_task_struct(root);
+	read_unlock(&tasklist_lock);
+
+	if (!root) {
+		err = -ESRCH;
+		eprintk_ctx("vps_collect_tasks: cannot find root\n");
+		goto out;
+	}
+
+	if ((obj = alloc_cpt_object(GFP_KERNEL, ctx)) == NULL) {
+		put_task_struct(root);
+		return -ENOMEM;
+	}
+	obj->o_count = 1;
+	cpt_obj_setobj(obj, root, ctx);
+	intern_cpt_object(CPT_OBJ_TASK, obj, ctx);
+	collect_task_ubc(root, ctx);
+
+	/* Collect process subtree recursively */
+	for_each_object(obj, CPT_OBJ_TASK) {
+		cpt_object_t *head = obj;
+		task_t *tsk = obj->o_obj;
+		task_t *child;
+
+		if (freezable(tsk) && !(tsk->flags&PF_FROZEN)) {
+			eprintk_ctx("process " CPT_FID " is not frozen\n", CPT_TID(tsk));
+			err = -EINVAL;
+			goto out;
+		}
+
+		if (tsk->state == TASK_RUNNING)
+			printk("Holy Crap 2 %ld " CPT_FID "\n", tsk->state, CPT_TID(tsk));
+
+		wait_task_inactive(tsk);
+
+		if (tsk->pid == tsk->tgid) {
+			child = tsk;
+			for (;;) {
+				read_lock(&tasklist_lock);
+				child = next_thread(child);
+				if (child != tsk)
+					get_task_struct(child);
+				read_unlock(&tasklist_lock);
+
+				if (child == tsk)
+					break;
+
+				if (child->real_parent != tsk->real_parent) {
+					put_task_struct(child);
+					eprintk_ctx("illegal thread structure, kernel bug\n");
+					return -EINVAL;
+				}
+
+				if ((head = remember_task(child, head, ctx)) == NULL) {
+					eprintk_ctx("task obj allocation failure\n");
+					err = -ENOMEM;
+					goto out;
+				}
+			}
+		}
+
+		/* About locking. VE is frozen. But lists of children
+		 * may change at least for init, when entered task reparents
+		 * to init and when reparented task exits. If we take care
+		 * of this case, we still can unlock while scanning
+		 * tasklists.
+		 */
+		read_lock(&tasklist_lock);
+		list_for_each_entry(child, &tsk->children, sibling) {
+			if (child->real_parent != tsk)
+				continue;
+			if (child->pid != child->tgid)
+				continue;
+			get_task_struct(child);
+			read_unlock(&tasklist_lock);
+
+			if ((head = remember_task(child, head, ctx)) == NULL) {
+				eprintk_ctx("task obj allocation failure\n");
+				err = -ENOMEM;
+				goto out;
+			}
+
+			read_lock(&tasklist_lock);
+		}
+
+		list_for_each_entry(child, &tsk->ptrace_children, ptrace_list) {
+			if (child->real_parent != tsk)
+				continue;
+			if (child->pid != child->tgid)
+				continue;
+			get_task_struct(child);
+			read_unlock(&tasklist_lock);
+
+			if ((head = remember_task(child, head, ctx)) == NULL) {
+				eprintk_ctx("task obj allocation failure\n");
+				err = -ENOMEM;
+				goto out;
+			}
+
+			read_lock(&tasklist_lock);
+		}
+		read_unlock(&tasklist_lock);
+	}
+
+	return 0;
+
+out:
+	while (!list_empty(&ctx->object_array[CPT_OBJ_TASK])) {
+		struct list_head *head = ctx->object_array[CPT_OBJ_TASK].next;
+		cpt_object_t *obj = list_entry(head, cpt_object_t, o_list);
+		task_t *tsk;
+
+		list_del(head);
+		tsk = obj->o_obj;
+		put_task_struct(tsk);
+		free_cpt_object(obj, ctx);
+	}
+	return err;
+}
+
+static int cpt_collect(struct cpt_context *ctx)
+{
+	int err;
+
+	if ((err = cpt_collect_mm(ctx)) != 0)
+		return err;
+
+	if ((err = cpt_collect_sysv(ctx)) != 0)
+		return err;
+
+	if ((err = cpt_collect_files(ctx)) != 0)
+		return err;
+
+	if ((err = cpt_collect_fs(ctx)) != 0)
+		return err;
+
+	if ((err = cpt_collect_namespace(ctx)) != 0)
+		return err;
+
+	if ((err = cpt_collect_signals(ctx)) != 0)
+		return err;
+
+	if (virtinfo_notifier_call(VITYPE_SCP,
+				VIRTINFO_SCP_COLLECT, ctx) & NOTIFY_FAIL)
+		return -ECHRNG;
+
+	return 0;
+}
+
+static int cpt_dump_veinfo(cpt_context_t *ctx)
+{
+	struct cpt_veinfo_image *i = cpt_get_buf(ctx);
+	struct ve_struct *ve;
+	struct timespec delta;
+
+	cpt_open_section(ctx, CPT_SECT_VEINFO);
+	cpt_open_object(NULL, ctx);
+
+	memset(i, 0, sizeof(*i));
+
+	i->cpt_next = CPT_NULL;
+	i->cpt_object = CPT_OBJ_VEINFO;
+	i->cpt_hdrlen = sizeof(*i);
+	i->cpt_content = CPT_CONTENT_VOID;
+
+	ve = get_exec_env();
+	i->shm_ctl_all = ve->_shm_ctlall;
+	i->shm_ctl_max = ve->_shm_ctlmax;
+	i->shm_ctl_mni = ve->_shm_ctlmni;
+
+	i->msg_ctl_max = ve->_msg_ctlmax;
+	i->msg_ctl_mni = ve->_msg_ctlmni;
+	i->msg_ctl_mnb = ve->_msg_ctlmnb;
+
+	BUILD_BUG_ON(sizeof(ve->_sem_ctls) != sizeof(i->sem_ctl_arr));
+	i->sem_ctl_arr[0] = ve->_sem_ctls[0];
+	i->sem_ctl_arr[1] = ve->_sem_ctls[1];
+	i->sem_ctl_arr[2] = ve->_sem_ctls[2];
+	i->sem_ctl_arr[3] = ve->_sem_ctls[3];
+
+	do_posix_clock_monotonic_gettime(&delta);
+	_set_normalized_timespec(&delta,
+			delta.tv_sec - ve->start_timespec.tv_sec,
+			delta.tv_nsec - ve->start_timespec.tv_nsec);
+	i->start_timespec_delta = cpt_timespec_export(&delta);
+	i->start_jiffies_delta = get_jiffies_64() - ve->start_jiffies;
+
+	i->last_pid = last_pid;
+
+	ctx->write(i, sizeof(*i), ctx);
+	cpt_release_buf(ctx);
+	cpt_close_object(ctx);
+	cpt_close_section(ctx);
+	return 0;
+}
+
+static int cpt_dump_utsname(cpt_context_t *ctx)
+{
+	int len;
+	struct cpt_object_hdr o;
+
+	cpt_open_section(ctx, CPT_SECT_UTSNAME);
+
+ 	cpt_open_object(NULL, ctx);
+	len = strlen(ve_utsname.nodename);
+ 	o.cpt_next = CPT_NULL;
+	o.cpt_object = CPT_OBJ_NAME;
+	o.cpt_hdrlen = sizeof(o);
+	o.cpt_content = CPT_CONTENT_NAME;
+
+	ctx->write(&o, sizeof(o), ctx);
+	ctx->write(ve_utsname.nodename, len+1, ctx);
+	ctx->align(ctx);
+ 	cpt_close_object(ctx);
+ 
+ 	cpt_open_object(NULL, ctx);
+	len = strlen(ve_utsname.domainname);
+ 	o.cpt_next = CPT_NULL;
+	o.cpt_object = CPT_OBJ_NAME;
+	o.cpt_hdrlen = sizeof(o);
+	o.cpt_content = CPT_CONTENT_NAME;
+
+	ctx->write(&o, sizeof(o), ctx);
+	ctx->write(ve_utsname.domainname, len+1, ctx);
+	ctx->align(ctx);
+	cpt_close_object(ctx);
+
+	cpt_close_section(ctx);
+	return 0;
+}
+
+int cpt_dump(struct cpt_context *ctx)
+{
+	struct ve_struct *oldenv, *env;
+	int err, err2 = 0;
+
+	if (!ctx->ve_id)
+		return -EINVAL;
+
+	env = get_ve_by_id(ctx->ve_id);
+	if (!env)
+		return -ESRCH;
+
+	down_read(&env->op_sem);
+	err = -ESRCH;
+	if (!env->is_running)
+		goto out_noenv;
+	if (!env->is_locked)
+		goto out_noenv;
+
+	oldenv = set_exec_env(env);
+
+	/* Phase 2: real checkpointing */
+	err = cpt_open_dumpfile(ctx);
+	if (err)
+		goto out;
+
+	cpt_major_hdr_out(ctx);
+
+	if (!err)
+		err = cpt_dump_veinfo(ctx);
+	if (!err)
+		err = cpt_dump_ubc(ctx);
+	if (!err)
+		err = cpt_dump_ifinfo(ctx);
+	if (!err)
+		err = cpt_dump_files(ctx);
+	if (!err)
+		err = cpt_dump_files_struct(ctx);
+	if (!err)
+		err = cpt_dump_fs_struct(ctx);
+	if (!err)
+		err = cpt_dump_namespace(ctx);
+	if (!err)
+		err = cpt_dump_sighand(ctx);
+	if (!err)
+		err = cpt_dump_vm(ctx);
+	if (!err)
+		err = cpt_dump_sysvsem(ctx);
+	if (!err)
+		err = cpt_dump_tasks(ctx);
+	if (!err)
+		err = cpt_dump_orphaned_sockets(ctx);
+#if defined(CONFIG_VE_IPTABLES) && \
+    (defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE))
+	if (!err)
+		err = cpt_dump_ip_conntrack(ctx);
+#endif
+	if (!err) {
+		if (virtinfo_notifier_call(VITYPE_SCP,
+					VIRTINFO_SCP_DUMP, ctx) & NOTIFY_FAIL)
+			err = -ECHRNG;
+	}
+	if (!err)
+		err = cpt_dump_utsname(ctx);
+
+	if (!err)
+		err = cpt_dump_tail(ctx);
+
+	err2 = cpt_close_dumpfile(ctx);
+
+out:
+	set_exec_env(oldenv);
+out_noenv:
+	up_read(&env->op_sem);
+	put_ve(env);
+	return err ? : err2;
+}
+
+int cpt_vps_suspend(struct cpt_context *ctx)
+{
+	struct ve_struct *oldenv, *env;
+	int err = 0;
+
+	ctx->kernel_config_flags = test_kernel_config();
+	cpt_object_init(ctx);
+
+	if (!ctx->ve_id) {
+		env = get_exec_env();
+		if (env == get_ve0())
+			return -EINVAL;
+		wprintk("undefined ve_id\n");
+		ctx->ve_id = env->veid;
+		get_ve(env);
+	} else {
+		env = get_ve_by_id(ctx->ve_id);
+		if (!env)
+			return -ESRCH;
+	}
+
+#ifdef CONFIG_VE_IPTABLES
+	ctx->iptables_mask = env->_iptables_modules;
+#endif
+	ctx->features = env->features;
+
+	down_write(&env->op_sem);
+	err = -ESRCH;
+	if (!env->is_running)
+		goto out_noenv;
+
+	err = -EBUSY;
+	if (env->is_locked)
+		goto out_noenv;
+	env->is_locked = 1;
+	downgrade_write(&env->op_sem);
+
+	oldenv = set_exec_env(env);
+
+	/* Phase 0: find and stop all the tasks */
+	if ((err = vps_stop_tasks(ctx)) != 0)
+		goto out;
+
+	if ((err = cpt_suspend_network(ctx)) != 0)
+		goto out_wake;
+
+	/* At the moment all the state is frozen. We do not need to lock
+	 * the state, which can be changed only if the tasks are running.
+	 */
+
+	/* Phase 1: collect task tree */
+	if ((err = vps_collect_tasks(ctx)) != 0)
+		goto out_wake;
+
+	/* Phase 1': collect all the resources */
+	if ((err = cpt_collect(ctx)) != 0)
+		goto out;
+
+out:
+	set_exec_env(oldenv);
+	up_read(&env->op_sem);
+	put_ve(env);
+        return err;
+
+out_noenv:
+	up_write(&env->op_sem);
+	put_ve(env);
+	return err;
+
+out_wake:
+	read_lock(&tasklist_lock);
+	wake_ve(ctx);
+	read_unlock(&tasklist_lock);
+	goto out;
+}
+
+static void check_unsupported_netdevices(struct cpt_context *ctx, __u32 *caps)
+{
+	struct net_device *dev;
+
+	read_lock(&dev_base_lock);
+	for (dev = dev_base; dev; dev = dev->next) {
+		if (dev != get_exec_env()->_loopback_dev
+#if defined(CONFIG_VE_ETHDEV) || defined(CONFIG_VE_ETHDEV_MODULE)
+		    && !(KSYMREF(veth_open) && dev->open == KSYMREF(veth_open))
+#endif
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+		    && dev != get_exec_env()->_venet_dev
+#endif
+							) {
+			eprintk_ctx("unsupported netdevice %s\n", dev->name);
+			*caps |= (1<<CPT_UNSUPPORTED_NETDEV);
+		}
+	}
+	read_unlock(&dev_base_lock);
+}
+
+static void check_one_process(struct cpt_context *ctx, __u32 *caps,
+		unsigned int flags, struct ve_struct *env,
+		task_t *root, task_t *p)
+{
+	if (tsk_used_math(p))
+		*caps |= flags;
+#ifdef CONFIG_X86_64
+	if (!(p->thread_info->flags & _TIF_IA32))
+		*caps |= (1<<CPT_CPU_X86_EMT64);
+#endif
+#ifdef CONFIG_IA64
+	if (!IS_IA32_PROCESS(task_pt_regs(p)))
+		*caps |= (1<<CPT_CPU_X86_IA64);
+#endif
+	if (vps_child_level(root, p) >= 0) {
+		if (!is_virtual_pid(virt_pid(p))) {
+			eprintk_ctx("external process %d/%d(%s) inside VE (e.g. vzctl enter or vzctl exec).\n", virt_pid(p), p->pid, p->comm);
+			*caps |= (1<<CPT_EXTERNAL_PROCESS);
+		}
+		if (!is_virtual_pid(virt_pgid(p))) {
+			eprintk_ctx("external process group %d/%d(%s) inside VE (e.g. vzctl enter or vzctl exec).\n", virt_pgid(p), p->pid, p->comm);
+			*caps |= (1<<CPT_EXTERNAL_PROCESS);
+		}
+		if (!is_virtual_pid(virt_sid(p))) {
+			eprintk_ctx("external process session %d/%d(%s) inside VE (e.g. vzctl enter or vzctl exec).\n", virt_sid(p), p->pid, p->comm);
+			*caps |= (1<<CPT_EXTERNAL_PROCESS);
+		}
+	} else {
+		eprintk_ctx("foreign process %d/%d(%s) inside VE (e.g. vzctl enter or vzctl exec).\n", virt_pid(p), p->pid, p->comm);
+		*caps |= (1<<CPT_EXTERNAL_PROCESS);
+	}
+	if (p->namespace && p->namespace != current->namespace) {
+		eprintk_ctx("namespaces are not supported: process %d/%d(%s)\n", virt_pid(p), p->pid, p->comm);
+		*caps |= (1<<CPT_NAMESPACES);
+	}
+	if (p->policy != SCHED_NORMAL) {
+		eprintk_ctx("scheduler policy is not supported %d/%d(%s)\n", virt_pid(p), p->pid, p->comm);
+		*caps |= (1<<CPT_SCHEDULER_POLICY);
+	}
+#if 0
+	if (check_utrace(p, root, ctx)) {
+		eprintk_ctx("task %d/%d(%s) is ptraced from VE0\n", p->pid, virt_pid(p), p->comm);
+		*caps |= (1<<CPT_PTRACED_FROM_VE0);
+	}
+#endif
+	if (cpt_check_unsupported(p, ctx)) {
+		*caps |= (1<<CPT_UNSUPPORTED_MISC);
+	}
+}
+
+static void check_unsupported_mounts(struct cpt_context *ctx, __u32 *caps,
+		struct ve_struct *env, struct namespace *n, char *path_buf)
+{
+	struct list_head *p;
+	char *path;
+
+	down_read(&namespace_sem);
+	list_for_each(p, &n->list) {
+		struct vfsmount *mnt = list_entry(p, struct vfsmount, mnt_list);
+
+		path = __d_path(mnt->mnt_root, mnt,
+				env->fs_root, env->fs_rootmnt,
+				path_buf, PAGE_SIZE);
+		if (IS_ERR(path))
+			continue;
+
+		if (check_one_vfsmount(mnt)) {
+			eprintk_ctx("Unsupported filesystem %s\n", mnt->mnt_sb->s_type->name);
+			*caps |= (1<<CPT_UNSUPPORTED_FSTYPE);
+		}
+	}
+	up_read(&namespace_sem);
+}
+
+int cpt_vps_caps(struct cpt_context *ctx, __u32 *caps)
+{
+	task_t *p;
+	task_t *root;
+	struct ve_struct *env;
+	struct ve_struct *old_env;
+	struct namespace *n;
+	int err;
+	unsigned int flags = test_cpu_caps();
+
+	if (!ctx->ve_id)
+		return -EINVAL;
+
+	env = get_ve_by_id(ctx->ve_id);
+	if (env == NULL)
+		return -ESRCH;
+
+	*caps = flags & (1<<CPT_CPU_X86_CMOV);
+	flags &= ~((1<<CPT_CPU_X86_EMT64)|(1<<CPT_CPU_X86_IA64));
+
+	old_env = set_exec_env(env);
+
+	check_unsupported_netdevices(ctx, caps);
+
+	read_lock(&tasklist_lock);
+	root = find_task_by_pid_ve(1);
+	if (!root) {
+		read_unlock(&tasklist_lock);
+		eprintk_ctx("cannot find ve init\n");
+		err = -ESRCH;
+		goto out;
+	}
+	get_task_struct(root);
+	for (p = __first_task_ve(env); p != NULL ; p = __next_task_ve(env, p))
+		check_one_process(ctx, caps, flags, env, root, p);
+	read_unlock(&tasklist_lock);
+
+	task_lock(root);
+	n = root->namespace;
+	if (n)
+		get_namespace(n);
+	task_unlock(root);
+
+	if (n) {
+		char *path_buf;
+
+		path_buf = (char *) __get_free_page(GFP_KERNEL);
+		if (!path_buf) {
+			put_namespace(n);
+			err = -ENOMEM;
+			goto out_root;
+		}
+
+		check_unsupported_mounts(ctx, caps, env, n, path_buf);
+
+		free_page((unsigned long) path_buf);
+		put_namespace(n);
+	}
+
+	err = 0;
+
+out_root:
+	put_task_struct(root);
+out:
+	set_exec_env(old_env);
+	put_ve(env);
+
+	return err;
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_dump.h linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_dump.h
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_dump.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_dump.h	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,16 @@
+int cpt_dump(struct cpt_context *cpt);
+int rst_undump(struct cpt_context *cpt);
+int cpt_suspend(struct cpt_context *cpt);
+int cpt_resume(struct cpt_context *cpt);
+int cpt_kill(struct cpt_context *cpt);
+int rst_clean(struct cpt_context *cpt);
+int rst_resume(struct cpt_context *cpt);
+int rst_kill(struct cpt_context *cpt);
+
+int cpt_freeze_one(pid_t pid, int freeze);
+int cpt_vps_suspend(struct cpt_context *ctx);
+int vps_rst_undump(struct cpt_context *ctx);
+
+int cpt_vps_caps(struct cpt_context *ctx, __u32 *caps);
+
+int cpt_check_unsupported(struct task_struct *tsk, struct cpt_context *ctx);
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_epoll.c linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_epoll.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_epoll.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_epoll.c	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,116 @@
+/*
+ *
+ *  kernel/cpt/cpt_epoll.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/namespace.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+#include <linux/vzcalluser.h>
+#include <linux/eventpoll.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_files.h"
+#include "cpt_kernel.h"
+#include "cpt_fsmagic.h"
+#include "cpt_syscalls.h"
+
+extern struct file_operations eventpoll_fops;
+
+int cpt_dump_epolldev(cpt_object_t *obj, cpt_context_t *ctx)
+{
+	int err = 0;
+	struct file *file = obj->o_obj;
+	struct eventpoll *ep;
+	struct rb_node *rbp;
+	struct cpt_epoll_image ei;
+
+	if (file->f_op != &eventpoll_fops) {
+		eprintk_ctx("bad epoll file\n");
+		return -EINVAL;
+	}
+
+	ep = file->private_data;
+
+	/* eventpoll.c does not protect open /proc/N/fd, silly.
+	 * Opener will get an invalid file with uninitialized private_data
+	 */
+	if (unlikely(ep == NULL)) {
+		eprintk_ctx("bad epoll device\n");
+		return -EINVAL;
+	}
+
+	cpt_open_object(NULL, ctx);
+
+	ei.cpt_next = CPT_NULL;
+	ei.cpt_object = CPT_OBJ_EPOLL;
+	ei.cpt_hdrlen = sizeof(ei);
+	ei.cpt_content = CPT_CONTENT_ARRAY;
+	ei.cpt_file = obj->o_pos;
+
+	ctx->write(&ei, sizeof(ei), ctx);
+
+	down(&epsem);
+	for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+		loff_t saved_obj;
+		cpt_object_t *tobj;
+		struct cpt_epoll_file_image efi;
+		struct epitem *epi;
+		epi = rb_entry(rbp, struct epitem, rbn);
+		tobj = lookup_cpt_object(CPT_OBJ_FILE, epi->ffd.file, ctx);
+		if (tobj == NULL) {
+			eprintk_ctx("epoll device refers to an external file\n");
+			err = -EBUSY;
+			break;
+		}
+		cpt_push_object(&saved_obj, ctx);
+		cpt_open_object(NULL, ctx);
+
+		efi.cpt_next = CPT_NULL;
+		efi.cpt_object = CPT_OBJ_EPOLL_FILE;
+		efi.cpt_hdrlen = sizeof(efi);
+		efi.cpt_content = CPT_CONTENT_VOID;
+		efi.cpt_file = tobj->o_pos;
+		efi.cpt_fd = epi->ffd.fd;
+		efi.cpt_events = epi->event.events;
+		efi.cpt_data = epi->event.data;
+		efi.cpt_revents = epi->revents;
+		efi.cpt_ready = 0;
+		if (!list_empty(&epi->rdllink))
+			efi.cpt_ready = 1;
+
+		ctx->write(&efi, sizeof(efi), ctx);
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+	up(&epsem);
+
+	cpt_close_object(ctx);
+
+	return err;
+}
+
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_exports.c linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_exports.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_exports.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_exports.c	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,12 @@
+#include <linux/module.h>
+
+#include "cpt_obj.h"
+
+EXPORT_SYMBOL(alloc_cpt_object);
+EXPORT_SYMBOL(intern_cpt_object);
+EXPORT_SYMBOL(insert_cpt_object);
+EXPORT_SYMBOL(__cpt_object_add);
+EXPORT_SYMBOL(cpt_object_add);
+EXPORT_SYMBOL(cpt_object_get);
+EXPORT_SYMBOL(lookup_cpt_object);
+EXPORT_SYMBOL(lookup_cpt_obj_bypos);
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_files.c linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_files.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_files.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_files.c	2007-08-28 17:35:36.000000000 +0400
@@ -0,0 +1,1628 @@
+/*
+ *
+ *  kernel/cpt/cpt_files.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/namespace.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/smp_lock.h>
+#include <linux/pagemap.h>
+#include <asm/uaccess.h>
+#include <linux/vzcalluser.h>
+#include <linux/ve_proto.h>
+#include <ub/ub_mem.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_files.h"
+#include "cpt_socket.h"
+#include "cpt_kernel.h"
+#include "cpt_fsmagic.h"
+#include "cpt_syscalls.h"
+
+void cpt_printk_dentry(struct dentry *d, struct vfsmount *mnt)
+{
+	char *path;
+	unsigned long pg = __get_free_page(GFP_KERNEL);
+
+	if (!pg)
+		return;
+
+	path = d_path(d, mnt, (char *)pg, PAGE_SIZE);
+
+	if (!IS_ERR(path))
+		eprintk("<%s>", path);
+	free_page(pg);
+}
+
+int cpt_verify_overmount(char *path, struct dentry *d, struct vfsmount *mnt,
+			 cpt_context_t *ctx)
+{
+	if (path[0] == '/' && !(!IS_ROOT(d) && d_unhashed(d))) {
+		struct nameidata nd;
+		if (path_lookup(path, 0, &nd)) {
+			eprintk_ctx("d_path cannot be looked up %s\n", path);
+			return -EINVAL;
+		}
+		if (nd.dentry != d || nd.mnt != mnt) {
+			eprintk_ctx("d_path is invisible %s\n", path);
+			path_release(&nd);
+			return -EINVAL;
+		}
+		path_release(&nd);
+	}
+	return 0;
+}
+
+static int
+cpt_replaced(struct dentry * de, struct vfsmount *mnt, cpt_context_t * ctx)
+{
+	int result = 0;
+
+#if defined(CONFIG_VZFS_FS) || defined(CONFIG_VZFS_FS_MODULE)
+	char *path;
+	unsigned long pg;
+	struct dentry * renamed_dentry;
+
+	if (de->d_sb->s_magic != FSMAGIC_VEFS)
+		return 0;
+	if (de->d_inode->i_nlink != 0 ||
+	    atomic_read(&de->d_inode->i_writecount) > 0) 
+		return 0;
+
+	renamed_dentry = vefs_replaced_dentry(de);
+	if (renamed_dentry == NULL)
+		return 0;
+
+	pg = __get_free_page(GFP_KERNEL);
+	if (!pg)
+		return 0;
+
+	path = d_path(de, mnt, (char *)pg, PAGE_SIZE);
+	if (!IS_ERR(path)) {
+		int len;
+		struct nameidata nd;
+
+		len = pg + PAGE_SIZE - 1 - (unsigned long)path;
+		if (len >= sizeof("(deleted) ") - 1 &&
+		    !memcmp(path, "(deleted) ", sizeof("(deleted) ") - 1)) {
+			len -= sizeof("(deleted) ") - 1;
+			path += sizeof("(deleted) ") - 1;
+		}
+
+		if (path_lookup(path, 0, &nd) == 0) {
+			if (mnt == nd.mnt &&
+			    vefs_is_renamed_dentry(nd.dentry, renamed_dentry))
+				result = 1;
+			path_release(&nd);
+		}
+	}
+	free_page(pg);
+#endif
+	return result;
+}
+
+static int cpt_dump_dentry(struct dentry *d, struct vfsmount *mnt,
+			   int replaced, cpt_context_t *ctx)
+{
+	int len;
+	char *path;
+	char *pg = cpt_get_buf(ctx);
+	loff_t saved;
+
+	path = d_path(d, mnt, pg, PAGE_SIZE);
+	len = PTR_ERR(path);
+
+	if (IS_ERR(path)) {
+		struct cpt_object_hdr o;
+		char tmp[1];
+
+		/* VZ changes d_path() to return EINVAL, when path
+		 * is not supposed to be visible inside VE.
+		 * This changes behaviour of d_path() comparing
+		 * to mainstream kernel, f.e. d_path() fails
+		 * on any kind of shared memory. Maybe, there are
+		 * another cases, but I am aware only about this one.
+		 * So, we just ignore error on shmem mounts and proceed.
+		 * Otherwise, checkpointing is prohibited because
+		 * of reference to an invisible file.
+		 */
+		if (len != -EINVAL ||
+		    mnt != get_exec_env()->shmem_mnt)
+			eprintk_ctx("d_path err=%d\n", len);
+		else
+			len = 0;
+
+		cpt_push_object(&saved, ctx);
+		cpt_open_object(NULL, ctx);
+		o.cpt_next = CPT_NULL;
+		o.cpt_object = CPT_OBJ_NAME;
+		o.cpt_hdrlen = sizeof(o);
+		o.cpt_content = CPT_CONTENT_NAME;
+		tmp[0] = 0;
+
+		ctx->write(&o, sizeof(o), ctx);
+		ctx->write(tmp, 1, ctx);
+		ctx->align(ctx);
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved, ctx);
+
+		__cpt_release_buf(ctx);
+		return len;
+	} else {
+		struct cpt_object_hdr o;
+
+		len = pg + PAGE_SIZE - 1 - path;
+		if (replaced &&
+		    len >= sizeof("(deleted) ") - 1 &&
+		    !memcmp(path, "(deleted) ", sizeof("(deleted) ") - 1)) {
+			len -= sizeof("(deleted) ") - 1;
+			path += sizeof("(deleted) ") - 1;
+		}
+		o.cpt_next = CPT_NULL;
+		o.cpt_object = CPT_OBJ_NAME;
+		o.cpt_hdrlen = sizeof(o);
+		o.cpt_content = CPT_CONTENT_NAME;
+		path[len] = 0;
+
+		if (cpt_verify_overmount(path, d, mnt, ctx)) {
+			__cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+
+		cpt_push_object(&saved, ctx);
+		cpt_open_object(NULL, ctx);
+		ctx->write(&o, sizeof(o), ctx);
+		ctx->write(path, len+1, ctx);
+		ctx->align(ctx);
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved, ctx);
+		__cpt_release_buf(ctx);
+	}
+	return 0;
+}
+
+int cpt_dump_string(const char *s, struct cpt_context *ctx)
+{
+	int len;
+	struct cpt_object_hdr o;
+
+	cpt_open_object(NULL, ctx);
+	len = strlen(s);
+	o.cpt_next = CPT_NULL;
+	o.cpt_object = CPT_OBJ_NAME;
+	o.cpt_hdrlen = sizeof(o);
+	o.cpt_content = CPT_CONTENT_NAME;
+
+	ctx->write(&o, sizeof(o), ctx);
+	ctx->write(s, len+1, ctx);
+	ctx->align(ctx);
+	cpt_close_object(ctx);
+	return 0;
+}
+
+static int
+cpt_dump_filename(struct file *file, int replaced, cpt_context_t *ctx)
+{
+	return cpt_dump_dentry(file->f_dentry, file->f_vfsmnt, replaced, ctx);
+}
+
+int cpt_dump_inode(struct dentry *d, struct vfsmount *mnt, struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_inode_image *v = cpt_get_buf(ctx);
+	struct kstat sbuf;
+
+	v->cpt_next = sizeof(*v);
+	v->cpt_object = CPT_OBJ_INODE;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	if ((err = vfs_getattr(mnt, d, &sbuf)) != 0) {
+		cpt_release_buf(ctx);
+		return err;
+	}
+
+	v->cpt_dev	= d->d_inode->i_sb->s_dev;
+	v->cpt_ino	= d->d_inode->i_ino;
+	v->cpt_mode	= sbuf.mode;
+	v->cpt_nlink	= sbuf.nlink;
+	v->cpt_uid	= sbuf.uid;
+	v->cpt_gid	= sbuf.gid;
+	v->cpt_rdev	= d->d_inode->i_rdev;
+	v->cpt_size	= sbuf.size;
+	v->cpt_atime	= cpt_timespec_export(&sbuf.atime);
+	v->cpt_mtime	= cpt_timespec_export(&sbuf.mtime);
+	v->cpt_ctime	= cpt_timespec_export(&sbuf.ctime);
+	v->cpt_blksize	= sbuf.blksize;
+	v->cpt_blocks	= sbuf.blocks;
+	v->cpt_sb	= d->d_inode->i_sb->s_magic;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+	return 0;
+}
+
+int cpt_collect_files(cpt_context_t * ctx)
+{
+	int err;
+	cpt_object_t *obj;
+	int index = 0;
+
+	/* Collect process fd sets */
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+		if (tsk->files && cpt_object_add(CPT_OBJ_FILES, tsk->files, ctx) == NULL)
+			return -ENOMEM;
+	}
+
+	/* Collect files from fd sets */
+	for_each_object(obj, CPT_OBJ_FILES) {
+		int fd;
+		struct files_struct *f = obj->o_obj;
+
+		cpt_obj_setindex(obj, index++, ctx);
+
+		if (obj->o_count != atomic_read(&f->count)) {
+			eprintk_ctx("files_struct is referenced outside %d %d\n", obj->o_count, atomic_read(&f->count));
+			return -EBUSY;
+		}
+
+		for (fd = 0; fd < f->fdt->max_fds; fd++) {
+			struct file *file = fcheck_files(f, fd);
+			if (file && cpt_object_add(CPT_OBJ_FILE, file, ctx) == NULL)
+				return -ENOMEM;
+		}
+	}
+
+	/* Collect files queued by AF_UNIX sockets. */
+	if ((err = cpt_collect_passedfds(ctx)) < 0)
+		return err;
+
+	/* OK. At this point we should count all the references. */
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file = obj->o_obj;
+		struct file *parent;
+		cpt_object_t *ino_obj;
+
+		if (obj->o_count != atomic_read(&file->f_count)) {
+			eprintk_ctx("file struct is referenced outside %d %d\n", obj->o_count, atomic_read(&file->f_count));
+			cpt_printk_dentry(file->f_dentry, file->f_vfsmnt);
+			return -EBUSY;
+		}
+
+		switch (file->f_dentry->d_inode->i_sb->s_magic) {
+		case FSMAGIC_FUTEX:
+		case FSMAGIC_MQUEUE:
+		case FSMAGIC_BDEV:
+#ifndef CONFIG_INOTIFY_USER
+		case FSMAGIC_INOTIFY:
+#endif
+			eprintk_ctx("file on unsupported FS: magic %08lx\n", file->f_dentry->d_inode->i_sb->s_magic);
+			return -EBUSY;
+		}
+
+		/* Collect inode. It is necessary mostly to resolve deleted
+		 * hard links. */
+		ino_obj = cpt_object_add(CPT_OBJ_INODE, file->f_dentry->d_inode, ctx);
+		if (ino_obj == NULL)
+			return -ENOMEM;
+
+		parent = ino_obj->o_parent;
+		if (!parent || (!IS_ROOT(parent->f_dentry) && d_unhashed(parent->f_dentry)))
+			ino_obj->o_parent = file;
+
+		if (S_ISCHR(file->f_dentry->d_inode->i_mode)) {
+			int maj = imajor(file->f_dentry->d_inode);
+			if (maj == PTY_MASTER_MAJOR ||
+			    (maj >= UNIX98_PTY_MASTER_MAJOR &&
+			     maj < UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT) ||
+			    maj == PTY_SLAVE_MAJOR ||
+			    maj == UNIX98_PTY_SLAVE_MAJOR ||
+			    maj == TTYAUX_MAJOR) {
+				err = cpt_collect_tty(file, ctx);
+				if (err)
+					return err;
+			}
+		}
+
+		if (S_ISSOCK(file->f_dentry->d_inode->i_mode)) {
+			err = cpt_collect_socket(file, ctx);
+			if (err)
+				return err;
+		}
+	}
+
+	err = cpt_index_sockets(ctx);
+
+	return err;
+}
+
+/* /dev/ptmx is special, all the files share one inode, but real tty backend
+ * is attached via file->private_data.
+ */
+
+static inline int is_cloning_inode(struct inode *ino)
+{
+	return S_ISCHR(ino->i_mode) &&
+		ino->i_rdev == MKDEV(TTYAUX_MAJOR,2);
+}
+
+static int dump_one_flock(struct file_lock *fl, int owner, struct cpt_context *ctx)
+{
+	pid_t pid;
+	struct cpt_flock_image *v = cpt_get_buf(ctx);
+
+	v->cpt_next = sizeof(*v);
+	v->cpt_object = CPT_OBJ_FLOCK;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_VOID;
+
+	v->cpt_owner = owner;
+
+	pid = fl->fl_pid;
+	if (pid && !is_virtual_pid(fl->fl_pid)) {
+		pid = _pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
+		if (pid == -1) {
+			if (!(fl->fl_flags&FL_FLOCK)) {
+				eprintk_ctx("posix lock from another VE?\n");
+				cpt_release_buf(ctx);
+				return -EBUSY;
+			}
+			pid = 0;
+		}
+	}
+
+	v->cpt_pid = pid;
+	v->cpt_start = fl->fl_start;
+	v->cpt_end = fl->fl_end;
+	v->cpt_flags = fl->fl_flags;
+	v->cpt_type = fl->fl_type;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+	return 0;
+}
+
+
+int cpt_dump_flock(struct file *file, struct cpt_context *ctx)
+{
+	int err = 0;
+	struct file_lock *fl;
+
+	lock_kernel();
+	for (fl = file->f_dentry->d_inode->i_flock;
+	     fl; fl = fl->fl_next) {
+		if (file != fl->fl_file)
+			continue;
+		if (fl->fl_flags & FL_LEASE) {
+			eprintk_ctx("lease lock is not supported\n");
+			err = -EINVAL;
+			break;
+		}
+		if (fl->fl_flags & FL_POSIX) {
+			cpt_object_t *obj;
+			obj = lookup_cpt_object(CPT_OBJ_FILES, fl->fl_owner, ctx);
+			if (obj) {
+				dump_one_flock(fl, obj->o_index, ctx);
+				continue;
+			} else {
+				eprintk_ctx("unknown lock owner %p\n", fl->fl_owner);
+				err = -EINVAL;
+			}
+		}
+		if (fl->fl_flags & FL_FLOCK) {
+			dump_one_flock(fl, -1, ctx);
+			continue;
+		}
+	}
+	unlock_kernel();
+	return err;
+}
+
+static int __comb_pid_to_vpid(int pid)
+{
+	int vpid = pid;
+
+	if (pid > 0) {
+		vpid = _pid_type_to_vpid(PIDTYPE_PID, pid);
+		if (unlikely(vpid < 0)) {
+			dprintk("pid %d does not exist amymore.\n", pid);
+			return 0;
+		}
+	} else if (pid < 0) {
+		vpid = _pid_type_to_vpid(PIDTYPE_PGID, -pid);
+		if (unlikely(vpid < 0)) {
+			dprintk("pgid %d does not exist amymore.\n", -pid);
+			return 0;
+		}
+		vpid = -vpid;
+	}
+	return vpid;
+}
+
+static int dump_one_file(cpt_object_t *obj, struct file *file, cpt_context_t *ctx)
+{
+	int err = 0;
+	cpt_object_t *iobj;
+	struct cpt_file_image *v = cpt_get_buf(ctx);
+	struct kstat sbuf;
+	int replaced = 0;
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_FILE;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_flags = file->f_flags;
+	v->cpt_mode = file->f_mode;
+	v->cpt_pos = file->f_pos;
+	v->cpt_uid = file->f_uid;
+	v->cpt_gid = file->f_gid;
+
+	vfs_getattr(file->f_vfsmnt, file->f_dentry, &sbuf);
+
+	v->cpt_i_mode = sbuf.mode;
+	v->cpt_lflags = 0;
+	if (IS_ROOT(file->f_dentry))
+		v->cpt_lflags |= CPT_DENTRY_ROOT;
+	else if (d_unhashed(file->f_dentry)) {
+		if (cpt_replaced(file->f_dentry, file->f_vfsmnt, ctx)) {
+			v->cpt_lflags |= CPT_DENTRY_REPLACED;
+			replaced = 1;
+		} else {
+			v->cpt_lflags |= CPT_DENTRY_DELETED;
+		}
+	}
+	if (is_cloning_inode(file->f_dentry->d_inode))
+		v->cpt_lflags |= CPT_DENTRY_CLONING;
+	if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_PROC)
+		v->cpt_lflags |= CPT_DENTRY_PROC;
+	v->cpt_inode = CPT_NULL;
+	if (!(v->cpt_lflags & CPT_DENTRY_REPLACED)) {
+		iobj = lookup_cpt_object(CPT_OBJ_INODE, file->f_dentry->d_inode, ctx);
+		if (iobj)
+			v->cpt_inode = iobj->o_pos;
+	}
+	v->cpt_priv = CPT_NULL;
+	v->cpt_fown_fd = -1;
+	if (S_ISCHR(v->cpt_i_mode)) {
+		iobj = lookup_cpt_object(CPT_OBJ_TTY, file->private_data, ctx);
+		if (iobj) {
+			v->cpt_priv = iobj->o_pos;
+			if (file->f_flags&FASYNC)
+				v->cpt_fown_fd = cpt_tty_fasync(file, ctx);
+		}
+	}
+	if (S_ISSOCK(v->cpt_i_mode)) {
+		if (obj->o_index < 0) {
+			eprintk_ctx("BUG: no socket index\n");
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+		v->cpt_priv = obj->o_index;
+		if (file->f_flags&FASYNC)
+			v->cpt_fown_fd = cpt_socket_fasync(file, ctx);
+	}
+	if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_EPOLL) {
+		v->cpt_priv = file->f_dentry->d_inode->i_ino;
+		v->cpt_lflags |= CPT_DENTRY_EPOLL;
+	}
+	if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_INOTIFY) {
+		v->cpt_priv = file->f_dentry->d_inode->i_ino;
+		v->cpt_lflags |= CPT_DENTRY_INOTIFY;
+	}
+
+	v->cpt_fown_pid = __comb_pid_to_vpid((int)file->f_owner.pid);
+	v->cpt_fown_uid = file->f_owner.uid;
+	v->cpt_fown_euid = file->f_owner.euid;
+	v->cpt_fown_signo = file->f_owner.signum;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	if (!S_ISSOCK(v->cpt_i_mode)) {
+		err = cpt_dump_filename(file, replaced, ctx);
+		if (err)
+			return err;
+		if ((file->f_mode & FMODE_WRITE) &&
+				file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_VEFS)
+			vefs_track_notify(file->f_dentry, 1);
+	}
+
+	if (file->f_dentry->d_inode->i_flock)
+		err = cpt_dump_flock(file, ctx);
+
+	cpt_close_object(ctx);
+
+	return err;
+}
+
+/* About this weird function... Crappy code dealing with SYSV shared memory
+ * defines TMPFS inode and file with f_op doing only mmap. So...
+ * Maybe, this is wrong and leaks something. It is clear access to
+ * SYSV shmem via mmap is quite unusual and impossible from user space.
+ */
+static int dump_content_shm(struct file *file, struct cpt_context *ctx)
+{
+	struct cpt_obj_bits *v;
+	loff_t saved_pos;
+	unsigned long addr;
+
+	addr = do_mmap_pgoff(file, 0, file->f_dentry->d_inode->i_size,
+			     PROT_READ, MAP_SHARED, 0);
+	if (IS_ERR((void*)addr))
+		return PTR_ERR((void*)addr);
+
+	cpt_push_object(&saved_pos, ctx);
+	cpt_open_object(NULL, ctx);
+	v = cpt_get_buf(ctx);
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_BITS;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_DATA;
+	v->cpt_size = file->f_dentry->d_inode->i_size;
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+	ctx->write((void*)addr, file->f_dentry->d_inode->i_size, ctx);
+	ctx->align(ctx);
+	do_munmap(current->mm, addr, file->f_dentry->d_inode->i_size);
+
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_pos, ctx);
+	return 0;
+}
+
+static int data_is_zero(char *addr, int len)
+{
+	int i;
+	unsigned long zerolong = 0;
+
+	for (i=0; i<len/sizeof(unsigned long); i++) {
+		if (((unsigned long*)(addr))[i] != 0)
+			return 0;
+	}
+	i = len % sizeof(unsigned long);
+	if (!i)
+		return 1;
+	return memcmp(addr + len - i, &zerolong, i) == 0;
+}
+
+
+static int dump_content_regular(struct file *file, struct cpt_context *ctx)
+{
+	loff_t saved_pos;
+	loff_t pos = 0;
+	loff_t obj_opened = CPT_NULL;
+	struct cpt_page_block pgb;
+	ssize_t (*do_read)(struct file *, char __user *, size_t, loff_t *);
+
+	if (file->f_op == NULL)
+		return -EINVAL;
+
+	if ((do_read = file->f_op->read) == NULL) {
+		if (file->f_op->mmap == NULL)
+			return -EINVAL;
+		if (file->f_dentry->d_inode->i_sb->s_magic != FSMAGIC_TMPFS) {
+			eprintk_ctx("unreadable, but not SYSV SHM file\n");
+			return -EINVAL;
+		}
+
+		do_read = file->f_dentry->d_inode->i_fop->read;
+		cpt_dump_content_sysvshm(file, ctx);
+		if (!do_read) {
+			wprintk_ctx("TMPFS is not configured?\n");
+			return dump_content_shm(file, ctx);
+		}
+	}
+
+	if (!(file->f_mode & FMODE_READ) ||
+	    (file->f_flags & O_DIRECT)) {
+		file = dentry_open(dget(file->f_dentry),
+				   mntget(file->f_vfsmnt), O_RDONLY);
+		if (IS_ERR(file)) {
+			cpt_printk_dentry(file->f_dentry, file->f_vfsmnt);
+			eprintk_ctx("cannot reopen file for read %ld\n", PTR_ERR(file));
+			return PTR_ERR(file);
+		}
+	} else {
+		atomic_inc(&file->f_count);
+	}
+
+	for (;;) {
+		mm_segment_t oldfs;
+		int err;
+
+		(void)cpt_get_buf(ctx);
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		err = do_read(file, ctx->tmpbuf, PAGE_SIZE, &pos);
+		set_fs(oldfs);
+		if (err < 0) {
+			eprintk_ctx("dump_content_regular: do_read: %d", err);
+			fput(file);
+			__cpt_release_buf(ctx);
+			return err;
+		}
+		if (err == 0) {
+			__cpt_release_buf(ctx);
+			break;
+		}
+		if (data_is_zero(ctx->tmpbuf, err)) {
+			if (obj_opened != CPT_NULL) {
+				ctx->pwrite(&pgb.cpt_end, 8, ctx, obj_opened + offsetof(struct cpt_page_block, cpt_end));
+				ctx->align(ctx);
+				cpt_close_object(ctx);
+				cpt_pop_object(&saved_pos, ctx);
+				obj_opened = CPT_NULL;
+			}
+		} else {
+			if (obj_opened == CPT_NULL) {
+				cpt_push_object(&saved_pos, ctx);
+				cpt_open_object(NULL, ctx);
+				obj_opened = ctx->file->f_pos;
+				pgb.cpt_next = CPT_NULL;
+				pgb.cpt_object = CPT_OBJ_PAGES;
+				pgb.cpt_hdrlen = sizeof(pgb);
+				pgb.cpt_content = CPT_CONTENT_DATA;
+				pgb.cpt_start = pos - err;
+				pgb.cpt_end = pgb.cpt_start;
+				ctx->write(&pgb, sizeof(pgb), ctx);
+			}
+			ctx->write(ctx->tmpbuf, err, ctx);
+			pgb.cpt_end += err;
+		}
+		__cpt_release_buf(ctx);
+	}
+
+	fput(file);
+
+	if (obj_opened != CPT_NULL) {
+		ctx->pwrite(&pgb.cpt_end, 8, ctx, obj_opened + offsetof(struct cpt_page_block, cpt_end));
+		ctx->align(ctx);
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_pos, ctx);
+		obj_opened = CPT_NULL;
+	}
+	return 0;
+}
+
+
+static int dump_content_chrdev(struct file *file, struct cpt_context *ctx)
+{
+	struct inode *ino = file->f_dentry->d_inode;
+	int maj;
+
+	maj = imajor(ino);
+	if (maj == MEM_MAJOR) {
+		/* Well, OK. */
+		return 0;
+	}
+	if (maj == PTY_MASTER_MAJOR ||
+	    (maj >= UNIX98_PTY_MASTER_MAJOR &&
+	     maj < UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT) ||
+	    maj == PTY_SLAVE_MAJOR ||
+	    maj == UNIX98_PTY_SLAVE_MAJOR ||
+	    maj == TTYAUX_MAJOR) {
+		return cpt_dump_content_tty(file, ctx);
+	}
+	eprintk_ctx("unsupported chrdev %d/%d\n", maj, iminor(ino));
+	return -EINVAL;
+}
+
+static int dump_content_blkdev(struct file *file, struct cpt_context *ctx)
+{
+	struct inode *ino = file->f_dentry->d_inode;
+
+	/* We are not going to transfer them. */
+	eprintk_ctx("unsupported blkdev %d/%d\n", imajor(ino), iminor(ino));
+	return -EINVAL;
+}
+
+static int dump_content_fifo(struct file *file, struct cpt_context *ctx)
+{
+	struct inode *ino = file->f_dentry->d_inode;
+	cpt_object_t *obj;
+	loff_t saved_pos;
+	int readers;
+	int writers;
+	int anon = 0;
+
+	mutex_lock(PIPE_MUTEX(*ino));
+	readers = PIPE_READERS(*ino);
+	writers = PIPE_WRITERS(*ino);
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file1 = obj->o_obj;
+		if (file1->f_dentry->d_inode == ino) {
+			if (file1->f_mode & FMODE_READ)
+				readers--;
+			if (file1->f_mode & FMODE_WRITE)
+				writers--;
+		}
+	}
+	mutex_unlock(PIPE_MUTEX(*ino));
+	if (readers || writers) {
+		struct dentry *dr = file->f_dentry->d_sb->s_root;
+		if (dr->d_name.len == 7 && memcmp(dr->d_name.name,"pipefs:",7) == 0)
+			anon = 1;
+
+		if (anon) {
+			eprintk_ctx("pipe has %d/%d external readers/writers\n", readers, writers);
+			return -EBUSY;
+		}
+		/* If fifo has external readers/writers, we are in troubles.
+		 * If the buffer is not empty, we must move its content.
+		 * But if the fifo is owned by a service, we cannot do
+		 * this. See?
+		 *
+		 * For now we assume, that if fifo is opened by another
+		 * process, we do not own it and, hence, migrate without
+		 * data.
+		 */
+		return 0;
+	}
+
+	/* OK, we must save fifo state. No semaphores required. */
+
+	if (ino->i_pipe->nrbufs) {
+		struct cpt_obj_bits *v = cpt_get_buf(ctx);
+		struct pipe_inode_info *info;
+		int count, buf, nrbufs;
+
+		mutex_lock(PIPE_MUTEX(*ino));
+		info =  ino->i_pipe;
+		count = 0;
+		buf = info->curbuf;
+		nrbufs = info->nrbufs;
+		while (--nrbufs >= 0) {
+			if (!info->bufs[buf].ops->can_merge) {
+				mutex_unlock(PIPE_MUTEX(*ino));
+				eprintk_ctx("unknown format of pipe buffer\n");
+				return -EINVAL;
+			}
+			count += info->bufs[buf].len;
+			buf = (buf+1) & (PIPE_BUFFERS-1);
+		}
+
+		if (!count) {
+			mutex_unlock(PIPE_MUTEX(*ino));
+			return 0;
+		}
+
+		cpt_push_object(&saved_pos, ctx);
+		cpt_open_object(NULL, ctx);
+		v->cpt_next = CPT_NULL;
+		v->cpt_object = CPT_OBJ_BITS;
+		v->cpt_hdrlen = sizeof(*v);
+		v->cpt_content = CPT_CONTENT_DATA;
+		v->cpt_size = count;
+		ctx->write(v, sizeof(*v), ctx);
+		cpt_release_buf(ctx);
+
+		count = 0;
+		buf = info->curbuf;
+		nrbufs = info->nrbufs;
+		while (--nrbufs >= 0) {
+			struct pipe_buffer *b = info->bufs + buf;
+			void * addr = b->ops->map(file, info, b);
+			ctx->write(addr + b->offset, b->len, ctx);
+			b->ops->unmap(info, b);
+			buf = (buf+1) & (PIPE_BUFFERS-1);
+		}
+
+		mutex_unlock(PIPE_MUTEX(*ino));
+
+		ctx->align(ctx);
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_pos, ctx);
+	}
+
+	return 0;
+}
+
+static int dump_content_socket(struct file *file, struct cpt_context *ctx)
+{
+	return 0;
+}
+
+struct cpt_dirent {
+	unsigned long	ino;
+	char		*name;
+	int		namelen;
+	int		found;
+};
+
+static int cpt_filldir(void * __buf, const char * name, int namelen,
+		loff_t offset, ino_t ino, unsigned int d_type)
+{
+	struct cpt_dirent * dirent = __buf;
+
+	if ((ino == dirent->ino) && (namelen < PAGE_SIZE - 1)) {
+		memcpy(dirent->name, name, namelen);
+		dirent->name[namelen] = '\0';
+		dirent->namelen = namelen;
+		dirent->found = 1;
+		return 1;
+	}
+	return 0;
+}
+
+static int find_linked_dentry(struct dentry *d, struct vfsmount *mnt,
+		struct inode *ino, struct cpt_context *ctx)
+{
+	int err = -EBUSY;
+	struct file *f = NULL;
+	struct cpt_dirent entry;
+	struct dentry *de, *found = NULL;
+
+	dprintk_ctx("deleted reference to existing inode, try to find file\n");
+	/* 1. Try to find not deleted dentry in ino->i_dentry list */
+	spin_lock(&dcache_lock);
+	list_for_each_entry(de, &ino->i_dentry, d_alias) {
+		if (!IS_ROOT(de) && d_unhashed(de))
+			continue;
+		found = de;
+		dget_locked(found);
+		break;
+	}
+	spin_unlock(&dcache_lock);
+	if (found) {
+		err = cpt_dump_dentry(found, mnt, 0, ctx);
+		dput(found);
+		if (!err) {
+			dprintk_ctx("dentry found in aliases\n");
+			return 0;
+		}
+	}
+
+	/* 2. Try to find file in current dir */
+	de = dget_parent(d);
+	if (!de)
+		return -EINVAL;
+
+	mntget(mnt);
+	f = dentry_open(de, mnt, O_RDONLY);
+	if (IS_ERR(f))
+		return PTR_ERR(f);
+
+	entry.ino = ino->i_ino;
+	entry.name = cpt_get_buf(ctx);
+	entry.found = 0;
+	err = vfs_readdir(f, cpt_filldir, &entry);
+	if (err || !entry.found) {
+		err = err ? err : -ENOENT;
+		goto err_readdir;
+	}
+
+	found = lookup_one_len(entry.name, de, entry.namelen);
+	if (IS_ERR(found)) {
+		err = PTR_ERR(found);
+		goto err_readdir;
+	}
+
+	err = -ENOENT;
+	if (found->d_inode != ino)
+		goto err_lookup;
+
+	dprintk_ctx("dentry found in dir\n");
+	__cpt_release_buf(ctx);
+	err = cpt_dump_dentry(found, mnt, 0, ctx);
+
+err_lookup:
+	dput(found);
+err_readdir:
+	fput(f);
+	__cpt_release_buf(ctx);
+	return err;
+}
+
+static int dump_one_inode(struct file *file, struct dentry *d,
+			  struct vfsmount *mnt, struct cpt_context *ctx)
+{
+	int err = 0;
+	struct inode *ino = d->d_inode;
+	cpt_object_t *iobj;
+	int dump_it = 0;
+
+	iobj = lookup_cpt_object(CPT_OBJ_INODE, ino, ctx);
+	if (!iobj)
+		return -EINVAL;
+
+	if (iobj->o_pos >= 0)
+		return 0;
+
+	if ((!IS_ROOT(d) && d_unhashed(d)) &&
+	    !cpt_replaced(d, mnt, ctx))
+		dump_it = 1;
+	if (!S_ISREG(ino->i_mode) && !S_ISDIR(ino->i_mode)) {
+		/* One more bug in epoll: invalid inode mode.
+		 * What a load of crap...
+		 */
+		if (ino->i_sb->s_magic == FSMAGIC_EPOLL &&
+		    (ino->i_mode & S_IFMT) == 0)
+			return 0;
+		dump_it = 1;
+	}
+
+	if (!dump_it)
+		return 0;
+
+	cpt_open_object(iobj, ctx);
+	cpt_dump_inode(d, mnt, ctx);
+
+	if (!IS_ROOT(d) && d_unhashed(d)) {
+		struct file *parent;
+		parent = iobj->o_parent;
+		if (!parent ||
+		    (!IS_ROOT(parent->f_dentry) && d_unhashed(parent->f_dentry))) {
+			/* Inode is not deleted, but it does not
+			 * have references from inside checkpointed
+			 * process group. */
+			if (ino->i_nlink != 0) {
+				err = find_linked_dentry(d, mnt, ino, ctx);
+				if (err) {
+					eprintk_ctx("deleted reference to existing inode, checkpointing is impossible: %d\n", err);
+					return -EBUSY;
+				}
+				if (S_ISREG(ino->i_mode) || S_ISDIR(ino->i_mode))
+					dump_it = 0;
+			}
+		} else {
+			/* Refer to _another_ file name. */
+			err = cpt_dump_filename(parent, 0, ctx);
+			if (err)
+				return err;
+			if (S_ISREG(ino->i_mode) || S_ISDIR(ino->i_mode))
+				dump_it = 0;
+		}
+	}
+	if (dump_it) {
+		if (S_ISREG(ino->i_mode)) {
+			if ((err = dump_content_regular(file, ctx)) != 0) {
+				eprintk_ctx("dump_content_regular ");
+				cpt_printk_dentry(d, mnt);
+			}
+		} else if (S_ISDIR(ino->i_mode)) {
+			/* We cannot do anything. The directory should be
+			 * empty, so it is not a big deal.
+			 */
+		} else if (S_ISCHR(ino->i_mode)) {
+			err = dump_content_chrdev(file, ctx);
+		} else if (S_ISBLK(ino->i_mode)) {
+			err = dump_content_blkdev(file, ctx);
+		} else if (S_ISFIFO(ino->i_mode)) {
+			err = dump_content_fifo(file, ctx);
+		} else if (S_ISSOCK(ino->i_mode)) {
+			err = dump_content_socket(file, ctx);
+		} else {
+			eprintk_ctx("unknown inode mode %o\n", ino->i_mode & S_IFMT);
+			err = -EINVAL;
+		}
+	}
+	cpt_close_object(ctx);
+
+	return err;
+}
+
+int cpt_dump_files(struct cpt_context *ctx)
+{
+	int epoll_nr, inotify_nr;
+	cpt_object_t *obj;
+
+	cpt_open_section(ctx, CPT_SECT_TTY);
+	for_each_object(obj, CPT_OBJ_TTY) {
+		int err;
+
+		if ((err = cpt_dump_tty(obj, ctx)) != 0)
+			return err;
+	}
+	cpt_close_section(ctx);
+
+	cpt_open_section(ctx, CPT_SECT_INODE);
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file = obj->o_obj;
+		int err;
+
+		if ((err = dump_one_inode(file, file->f_dentry,
+					  file->f_vfsmnt, ctx)) != 0)
+			return err;
+	}
+	for_each_object(obj, CPT_OBJ_FS) {
+		struct fs_struct *fs = obj->o_obj;
+		int err;
+
+		if (fs->root &&
+		    (err = dump_one_inode(NULL, fs->root, fs->rootmnt, ctx)) != 0)
+			return err;
+		if (fs->pwd &&
+		    (err = dump_one_inode(NULL, fs->pwd, fs->pwdmnt, ctx)) != 0)
+			return err;
+		if (fs->altroot &&
+		    (err = dump_one_inode(NULL, fs->altroot, fs->altrootmnt, ctx)) != 0)
+			return err;
+	}
+	cpt_close_section(ctx);
+
+	epoll_nr = 0;
+	inotify_nr = 0;
+	cpt_open_section(ctx, CPT_SECT_FILES);
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file = obj->o_obj;
+		int err;
+
+		if ((err = dump_one_file(obj, file, ctx)) != 0)
+			return err;
+		if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_EPOLL)
+			epoll_nr++;
+		if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_INOTIFY)
+			inotify_nr++;
+	}
+	cpt_close_section(ctx);
+
+	if (epoll_nr) {
+		cpt_open_section(ctx, CPT_SECT_EPOLL);
+		for_each_object(obj, CPT_OBJ_FILE) {
+			struct file *file = obj->o_obj;
+			if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_EPOLL) {
+				int err;
+				if ((err = cpt_dump_epolldev(obj, ctx)) != 0)
+					return err;
+			}
+		}
+		cpt_close_section(ctx);
+	}
+
+	if (inotify_nr) {
+		cpt_open_section(ctx, CPT_SECT_INOTIFY);
+		for_each_object(obj, CPT_OBJ_FILE) {
+			struct file *file = obj->o_obj;
+			if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_INOTIFY) {
+				int err = -EINVAL;
+#ifdef CONFIG_INOTIFY_USER
+				if ((err = cpt_dump_inotify(obj, ctx)) != 0)
+#endif
+					return err;
+			}
+		}
+		cpt_close_section(ctx);
+	}
+
+	cpt_open_section(ctx, CPT_SECT_SOCKET);
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		int err;
+
+		if ((err = cpt_dump_socket(obj, obj->o_obj, obj->o_index, -1, ctx)) != 0)
+			return err;
+	}
+	cpt_close_section(ctx);
+
+	return 0;
+}
+
+static int dump_filedesc(int fd, struct file *file,
+			 struct files_struct *f, struct cpt_context *ctx)
+{
+	struct cpt_fd_image *v = cpt_get_buf(ctx);
+	cpt_object_t *obj;
+
+	cpt_open_object(NULL, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_FILEDESC;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_VOID;
+
+	v->cpt_fd = fd;
+	obj = lookup_cpt_object(CPT_OBJ_FILE, file, ctx);
+	if (!obj) BUG();
+	v->cpt_file = obj->o_pos;
+	v->cpt_flags = 0;
+	if (FD_ISSET(fd, f->fdt->close_on_exec))
+		v->cpt_flags = CPT_FD_FLAG_CLOSEEXEC;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+	cpt_close_object(ctx);
+
+	return 0;
+}
+
+static int dump_one_file_struct(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct files_struct *f = obj->o_obj;
+	struct cpt_files_struct_image *v = cpt_get_buf(ctx);
+	int fd;
+	loff_t saved_obj;
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_FILES;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_index = obj->o_index;
+	v->cpt_max_fds = f->fdt->max_fds;
+	v->cpt_next_fd = f->fdt->next_fd;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+	for (fd = 0; fd < f->fdt->max_fds; fd++) {
+		struct file *file = fcheck_files(f, fd);
+		if (file)
+			dump_filedesc(fd, file, f, ctx);
+	}
+	cpt_pop_object(&saved_obj, ctx);
+
+	cpt_close_object(ctx);
+
+	return 0;
+}
+
+int cpt_dump_files_struct(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	cpt_open_section(ctx, CPT_SECT_FILES_STRUCT);
+
+	for_each_object(obj, CPT_OBJ_FILES) {
+		int err;
+
+		if ((err = dump_one_file_struct(obj, ctx)) != 0)
+			return err;
+	}
+
+	cpt_close_section(ctx);
+	return 0;
+}
+
+int cpt_collect_fs(cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+		if (tsk->fs) {
+			if (cpt_object_add(CPT_OBJ_FS, tsk->fs, ctx) == NULL)
+				return -ENOMEM;
+			if (tsk->fs->pwd &&
+			    cpt_object_add(CPT_OBJ_INODE, tsk->fs->pwd->d_inode, ctx) == NULL)
+				return -ENOMEM;
+			if (tsk->fs->root &&
+			    cpt_object_add(CPT_OBJ_INODE, tsk->fs->root->d_inode, ctx) == NULL)
+				return -ENOMEM;
+			if (tsk->fs->altroot &&
+			    cpt_object_add(CPT_OBJ_INODE, tsk->fs->altroot->d_inode, ctx) == NULL)
+				return -ENOMEM;
+		}
+	}
+	return 0;
+}
+
+int cpt_dump_dir(struct dentry *d, struct vfsmount *mnt, struct cpt_context *ctx)
+{
+	struct file file;
+
+	memset(&file, 0, sizeof(file));
+
+	file.f_dentry = d;
+	file.f_vfsmnt = mnt;
+	file.f_mode = FMODE_READ|FMODE_PREAD|FMODE_LSEEK;
+	return dump_one_file(NULL, &file, ctx);
+}
+
+static int dump_one_fs(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct fs_struct *fs = obj->o_obj;
+	struct cpt_fs_struct_image *v = cpt_get_buf(ctx);
+	loff_t saved_obj;
+	int err;
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_FS;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_umask = fs->umask;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+	err = cpt_dump_dir(fs->root, fs->rootmnt, ctx);
+	if (!err)
+		err = cpt_dump_dir(fs->pwd, fs->pwdmnt, ctx);
+	if (!err && fs->altroot)
+		err = cpt_dump_dir(fs->altroot, fs->altrootmnt, ctx);
+
+	cpt_pop_object(&saved_obj, ctx);
+
+	cpt_close_object(ctx);
+
+	return err;
+}
+
+int cpt_dump_fs_struct(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	cpt_open_section(ctx, CPT_SECT_FS);
+
+	for_each_object(obj, CPT_OBJ_FS) {
+		int err;
+
+		if ((err = dump_one_fs(obj, ctx)) != 0)
+			return err;
+	}
+
+	cpt_close_section(ctx);
+	return 0;
+}
+
+static int check_one_namespace(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	int err = 0;
+	struct namespace *n = obj->o_obj;
+	struct list_head *p;
+	char *path_buf, *path;
+
+	path_buf = (char *) __get_free_page(GFP_KERNEL);
+	if (!path_buf)
+		return -ENOMEM;
+
+	down_read(&namespace_sem);
+	list_for_each(p, &n->list) {
+		struct vfsmount *mnt = list_entry(p, struct vfsmount, mnt_list);
+
+		path = d_path(mnt->mnt_root, mnt, path_buf, PAGE_SIZE);
+		if (IS_ERR(path))
+			continue;
+
+		if (check_one_vfsmount(mnt)) {
+			eprintk_ctx("unsupported fs type %s\n", mnt->mnt_sb->s_type->name);
+			err = -EINVAL;
+			break;
+		}
+	}
+	up_read(&namespace_sem);
+
+	free_page((unsigned long) path_buf);
+
+	return err;
+}
+
+int cpt_collect_namespace(cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+		if (tsk->namespace && cpt_object_add(CPT_OBJ_NAMESPACE, tsk->namespace, ctx) == NULL)
+			return -ENOMEM;
+	}
+
+	for_each_object(obj, CPT_OBJ_NAMESPACE) {
+		int err;
+		if ((err = check_one_namespace(obj, ctx)) != 0)
+			return err;
+	}
+
+	return 0;
+}
+
+struct args_t
+{
+	int* pfd;
+	char* path;
+};
+
+static int dumptmpfs(void *arg)
+{
+	int i;
+	struct args_t *args = arg;
+	int *pfd = args->pfd;
+	int fd0, fd2;
+	char *path = args->path;
+	char *argv[] = { "tar", "-c", "-S", "--numeric-owner", path, NULL };
+
+	i = real_env_create(VEID(get_exec_env()), VE_ENTER|VE_SKIPLOCK, 2, NULL, 0);
+	if (i < 0) {
+		eprintk("cannot enter ve to dump tmpfs\n");
+		module_put(THIS_MODULE);
+		return 255 << 8;
+	}
+
+	if (pfd[1] != 1)
+		sc_dup2(pfd[1], 1);
+	set_fs(KERNEL_DS);
+	fd0 = sc_open("/dev/null", O_RDONLY, 0);
+	fd2 = sc_open("/dev/null", O_WRONLY, 0);
+	if (fd0 < 0 || fd2 < 0) {
+		eprintk("can not open /dev/null for tar: %d %d\n", fd0, fd2);
+		module_put(THIS_MODULE);
+		return 255 << 8;
+	}
+	if (fd0 != 0)
+		sc_dup2(fd0, 0);
+	if (fd2 != 2)
+		sc_dup2(fd2, 2);
+
+	for (i = 3; i < current->files->fdt->max_fds; i++) {
+		sc_close(i);
+	}
+
+	module_put(THIS_MODULE);
+
+	i = sc_execve("/bin/tar", argv, NULL);
+	eprintk("failed to exec /bin/tar: %d\n", i);
+	return 255 << 8;
+}
+
+static int cpt_dump_tmpfs(char *path, struct cpt_context *ctx)
+{
+	int err;
+	int pid;
+	int pfd[2];
+	struct file *f;
+	struct cpt_object_hdr v;
+	char buf[16];
+	int n;
+	loff_t saved_obj;
+	struct args_t args;
+	int status;
+	mm_segment_t oldfs;
+	sigset_t ignore, blocked;
+
+	err = sc_pipe(pfd);
+	if (err < 0)
+		return err;
+	args.pfd = pfd;
+	args.path = path;
+	ignore.sig[0] = CPT_SIG_IGNORE_MASK;
+	sigprocmask(SIG_BLOCK, &ignore, &blocked);
+	err = pid = local_kernel_thread(dumptmpfs, (void*)&args, SIGCHLD, 0);
+	if (err < 0) {
+		eprintk_ctx("tmpfs local_kernel_thread: %d\n", err);
+		goto out;
+	}
+	f = fget(pfd[0]);
+	sc_close(pfd[1]);
+	sc_close(pfd[0]);
+
+	cpt_push_object(&saved_obj, ctx);
+	cpt_open_object(NULL, ctx);
+	v.cpt_next = CPT_NULL;
+	v.cpt_object = CPT_OBJ_NAME;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_NAME;
+
+	ctx->write(&v, sizeof(v), ctx);
+
+	do {
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		n = f->f_op->read(f, buf, sizeof(buf), &f->f_pos);
+		set_fs(oldfs);
+		if (n > 0)
+			ctx->write(buf, n, ctx);
+	} while (n > 0);
+
+	fput(f);
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	if ((err = sc_waitx(pid, 0, &status)) < 0)
+		eprintk_ctx("wait4: %d\n", err);
+	else if ((status & 0x7f) == 0) {
+		err = (status & 0xff00) >> 8;
+		if (err != 0) {
+			eprintk_ctx("tar exited with %d\n", err);
+			err = -EINVAL;
+		}
+	} else {
+		eprintk_ctx("tar terminated\n");
+		err = -EINVAL;
+	}
+	set_fs(oldfs);
+	sigprocmask(SIG_SETMASK, &blocked, NULL);
+
+	buf[0] = 0;
+	ctx->write(buf, 1, ctx);
+	ctx->align(ctx);
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_obj, ctx);
+	return n ? : err;
+
+out:
+	if (pfd[1] >= 0)
+		sc_close(pfd[1]);
+	if (pfd[0] >= 0)
+		sc_close(pfd[0]);
+	sigprocmask(SIG_SETMASK, &blocked, NULL);
+	return err;
+}
+
+static int loopy_root(struct vfsmount *mnt)
+{
+	struct list_head *p;
+
+	list_for_each(p, &mnt->mnt_namespace->list) {
+		struct vfsmount * m = list_entry(p, struct vfsmount, mnt_list);
+		if (m == mnt)
+			return 0;
+		if (m->mnt_sb == mnt->mnt_sb)
+			return 1;
+	}
+	/* Cannot happen */
+	return 0;
+}
+
+static int cpt_dump_bind_mnt(struct vfsmount * mnt, cpt_context_t * ctx)
+{
+	struct list_head *p;
+	int err = -EINVAL;
+
+	/* One special case: mount --bind /a /a */
+	if (mnt->mnt_root == mnt->mnt_mountpoint)
+		return cpt_dump_dentry(mnt->mnt_root, mnt, 0, ctx);
+
+	list_for_each_prev(p, &mnt->mnt_list) {
+		struct vfsmount * m;
+
+		if (p == &mnt->mnt_namespace->list)
+			break;
+
+		m = list_entry(p, struct vfsmount, mnt_list);
+
+		if (m->mnt_sb != mnt->mnt_sb)
+			continue;
+
+		err = cpt_dump_dentry(mnt->mnt_root, m, 0, ctx);
+		if (err == 0)
+			break;
+	}
+	return err;
+}
+
+static int dump_vfsmount(struct vfsmount *mnt, struct cpt_context *ctx)
+{
+	int err = 0;
+	struct cpt_vfsmount_image v;
+	loff_t saved_obj;
+	char *path_buf, *path;
+
+	path_buf = (char *) __get_free_page(GFP_KERNEL);
+	if (!path_buf)
+		return -ENOMEM;
+
+	path = d_path(mnt->mnt_root, mnt, path_buf, PAGE_SIZE);
+	if (IS_ERR(path)) {
+		free_page((unsigned long) path_buf);
+		return PTR_ERR(path) == -EINVAL ? 0 : PTR_ERR(path);
+	}
+
+	cpt_open_object(NULL, ctx);
+
+	v.cpt_next = CPT_NULL;
+	v.cpt_object = CPT_OBJ_VFSMOUNT;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_ARRAY;
+
+	v.cpt_mntflags = mnt->mnt_flags;
+	if (slab_ub(mnt) != get_exec_ub()) {
+		v.cpt_mntflags |= CPT_MNT_EXT;
+	} else {
+		if (mnt->mnt_root != mnt->mnt_sb->s_root || loopy_root(mnt))
+			v.cpt_mntflags |= CPT_MNT_BIND;
+	}
+	v.cpt_flags = mnt->mnt_sb->s_flags;
+
+	ctx->write(&v, sizeof(v), ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+	cpt_dump_string(mnt->mnt_devname ? : "none", ctx);
+	cpt_dump_string(path, ctx);
+	cpt_dump_string(mnt->mnt_sb->s_type->name, ctx);
+
+	if (v.cpt_mntflags & CPT_MNT_BIND)
+		err = cpt_dump_bind_mnt(mnt, ctx);
+	else if (!(v.cpt_mntflags & CPT_MNT_EXT) &&
+		   strcmp(mnt->mnt_sb->s_type->name, "tmpfs") == 0) {
+		mntget(mnt);
+		up_read(&namespace_sem);
+		err = cpt_dump_tmpfs(path, ctx);
+		down_read(&namespace_sem);
+		if (!err) {
+			if (list_empty(&mnt->mnt_list))
+				err = -EBUSY;
+		}
+		mntput(mnt);
+	}
+
+	cpt_pop_object(&saved_obj, ctx);
+
+	cpt_close_object(ctx);
+	if (!err && mnt->mnt_sb->s_magic == FSMAGIC_VEFS)
+		vefs_track_force_stop(mnt->mnt_sb);
+
+	free_page((unsigned long) path_buf);
+
+	return err;
+}
+
+static int dump_one_namespace(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct namespace *n = obj->o_obj;
+	struct cpt_object_hdr v;
+	struct list_head *p;
+	loff_t saved_obj;
+	int err = 0;
+
+	cpt_open_object(obj, ctx);
+
+	v.cpt_next = -1;
+	v.cpt_object = CPT_OBJ_NAMESPACE;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_ARRAY;
+
+	ctx->write(&v, sizeof(v), ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+
+	down_read(&namespace_sem);
+	list_for_each(p, &n->list) {
+		err = dump_vfsmount(list_entry(p, struct vfsmount, mnt_list), ctx);
+		if (err)
+			break;
+	}
+	up_read(&namespace_sem);
+
+	cpt_pop_object(&saved_obj, ctx);
+
+	cpt_close_object(ctx);
+
+	return err;
+}
+
+int cpt_dump_namespace(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	cpt_open_section(ctx, CPT_SECT_NAMESPACE);
+
+	for_each_object(obj, CPT_OBJ_NAMESPACE) {
+		int err;
+
+		if ((err = dump_one_namespace(obj, ctx)) != 0)
+			return err;
+	}
+
+	cpt_close_section(ctx);
+	return 0;
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_files.h linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_files.h
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_files.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_files.h	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,69 @@
+int cpt_collect_files(cpt_context_t *);
+int cpt_collect_fs(cpt_context_t *);
+int cpt_collect_namespace(cpt_context_t *);
+int cpt_collect_sysvsem_undo(cpt_context_t *);
+int cpt_collect_tty(struct file *, cpt_context_t *);
+int cpt_dump_files(struct cpt_context *ctx);
+int cpt_dump_files_struct(struct cpt_context *ctx);
+int cpt_dump_fs_struct(struct cpt_context *ctx);
+int cpt_dump_content_sysvshm(struct file *file, struct cpt_context *ctx);
+int cpt_dump_content_tty(struct file *file, struct cpt_context *ctx);
+int cpt_dump_tty(cpt_object_t *, struct cpt_context *ctx);
+struct file * rst_sysv_shm(loff_t pos, struct cpt_context *ctx);
+struct file * rst_open_tty(struct cpt_file_image *fi, struct cpt_inode_image *ii, unsigned flags, struct cpt_context *ctx);
+__u32 cpt_tty_fasync(struct file *file, struct cpt_context *ctx);
+
+int rst_posix_locks(struct cpt_context *ctx);
+
+struct file *rst_file(loff_t pos, int fd, struct cpt_context *ctx);
+int rst_files_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
+__u32 rst_files_flag(struct cpt_task_image *ti, struct cpt_context *ctx);
+int rst_fs_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
+int rst_restore_fs(struct cpt_context *ctx);
+
+int cpt_collect_sysv(cpt_context_t *);
+int cpt_dump_sysvsem(struct cpt_context *ctx);
+int rst_sysv_ipc(struct cpt_context *ctx);
+int rst_semundo_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
+__u32 rst_semundo_flag(struct cpt_task_image *ti, struct cpt_context *ctx);
+
+int cpt_dump_namespace(struct cpt_context *ctx);
+int rst_root_namespace(struct cpt_context *ctx);
+
+int rst_stray_files(struct cpt_context *ctx);
+int rst_tty_jobcontrol(struct cpt_context *ctx);
+
+void rst_flush_filejobs(struct cpt_context *);
+int rst_do_filejobs(struct cpt_context *);
+
+int rst_eventpoll(struct cpt_context *);
+struct file *cpt_open_epolldev(struct cpt_file_image *fi,
+			       unsigned flags,
+			       struct cpt_context *ctx);
+int cpt_dump_epolldev(cpt_object_t *obj, struct cpt_context *);
+
+int cpt_dump_dir(struct dentry *d, struct vfsmount *mnt, struct cpt_context *ctx);
+int cpt_get_dentry(struct dentry **dp, struct vfsmount **mp,
+		   loff_t *pos, struct cpt_context *ctx);
+
+int cpt_dump_inotify(cpt_object_t *obj, cpt_context_t *ctx);
+int rst_inotify(cpt_context_t *ctx);
+struct file *rst_open_inotify(struct cpt_file_image *fi,
+			      unsigned flags,
+			      struct cpt_context *ctx);
+
+
+int cpt_verify_overmount(char *path, struct dentry *d, struct vfsmount *mnt,
+			 cpt_context_t *ctx);
+
+#define check_one_vfsmount(mnt) \
+	(strcmp(mnt->mnt_sb->s_type->name, "rootfs") != 0 && \
+	 strcmp(mnt->mnt_sb->s_type->name, "vzfs") != 0 && \
+	 strcmp(mnt->mnt_sb->s_type->name, "ext3") != 0 && \
+	 strcmp(mnt->mnt_sb->s_type->name, "ext2") != 0 && \
+	 strcmp(mnt->mnt_sb->s_type->name, "simfs") != 0 && \
+	 strcmp(mnt->mnt_sb->s_type->name, "unionfs") != 0 && \
+	 strcmp(mnt->mnt_sb->s_type->name, "tmpfs") != 0 && \
+	 strcmp(mnt->mnt_sb->s_type->name, "devpts") != 0 && \
+	 strcmp(mnt->mnt_sb->s_type->name, "proc") != 0 && \
+	 strcmp(mnt->mnt_sb->s_type->name, "sysfs") != 0)
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_fsmagic.h linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_fsmagic.h
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_fsmagic.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_fsmagic.h	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,17 @@
+/* Collected from kernel sources. */
+
+#define FSMAGIC_TMPFS	0x01021994
+#define FSMAGIC_PIPEFS	0x50495045
+#define FSMAGIC_SOCKFS	0x534F434B
+#define FSMAGIC_PFMFS	0xa0b4d889
+#define FSMAGIC_BDEV	0x62646576
+#define FSMAGIC_EPOLL	0x03111965
+#define FSMAGIC_FUTEX	0x0BAD1DEA
+#define FSMAGIC_INOTIFY	0x2BAD1DEA
+#define FSMAGIC_MQUEUE	0x19800202
+#define FSMAGIC_PROC	0x9fa0
+#define FSMAGIC_DEVPTS	0x1CD1
+#define FSMAGIC_AUTOFS	0x0187
+#define FSMAGIC_EXT2	0xEF53
+#define FSMAGIC_REISER	0x52654973
+#define FSMAGIC_VEFS    0x565a4653
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_inotify.c linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_inotify.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_inotify.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_inotify.c	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,145 @@
+/*
+ *
+ *  kernel/cpt/cpt_inotify.c
+ *
+ *  Copyright (C) 2000-2007  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/namespace.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+#include <linux/vzcalluser.h>
+#include <linux/inotify.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_files.h"
+#include "cpt_kernel.h"
+#include "cpt_fsmagic.h"
+#include "cpt_syscalls.h"
+
+extern struct file_operations inotify_fops;
+
+int cpt_dump_inotify(cpt_object_t *obj, cpt_context_t *ctx)
+{
+	int err = 0;
+	struct file *file = obj->o_obj;
+	struct inotify_device *dev;
+	struct inotify_watch *watch;
+	struct inotify_kernel_event *kev;
+	struct cpt_inotify_image ii;
+
+	if (file->f_op != &inotify_fops) {
+		eprintk_ctx("bad inotify file\n");
+		return -EINVAL;
+	}
+
+	dev = file->private_data;
+
+	/* inotify_user.c does not protect open /proc/N/fd, silly.
+	 * Opener will get an invalid file with uninitialized private_data
+	 */
+	if (unlikely(dev == NULL)) {
+		eprintk_ctx("bad inotify dev\n");
+		return -EINVAL;
+	}
+
+	cpt_open_object(NULL, ctx);
+
+	ii.cpt_next = CPT_NULL;
+	ii.cpt_object = CPT_OBJ_INOTIFY;
+	ii.cpt_hdrlen = sizeof(ii);
+	ii.cpt_content = CPT_CONTENT_ARRAY;
+	ii.cpt_file = obj->o_pos;
+	ii.cpt_user = dev->user->uid;
+	ii.cpt_max_events = dev->max_events;
+	ii.cpt_last_wd = dev->ih->last_wd;
+
+	ctx->write(&ii, sizeof(ii), ctx);
+
+	mutex_lock(&dev->ih->mutex);
+	list_for_each_entry(watch, &dev->ih->watches, h_list) {
+		loff_t saved_obj;
+		loff_t saved_obj2;
+		struct cpt_inotify_wd_image wi;
+
+		cpt_push_object(&saved_obj, ctx);
+		cpt_open_object(NULL, ctx);
+
+		wi.cpt_next = CPT_NULL;
+		wi.cpt_object = CPT_OBJ_INOTIFY_WATCH;
+		wi.cpt_hdrlen = sizeof(wi);
+		wi.cpt_content = CPT_CONTENT_ARRAY;
+		wi.cpt_wd = watch->wd;
+		wi.cpt_mask = watch->mask;
+
+		ctx->write(&wi, sizeof(wi), ctx);
+
+		cpt_push_object(&saved_obj2, ctx);
+		err = cpt_dump_dir(watch->dentry, watch->mnt, ctx);
+		cpt_pop_object(&saved_obj2, ctx);
+		if (err)
+			break;
+
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+	mutex_unlock(&dev->ih->mutex);
+
+	if (err)
+		return err;
+
+	mutex_lock(&dev->ev_mutex);
+	list_for_each_entry(kev, &dev->events, list) {
+		loff_t saved_obj;
+		struct cpt_inotify_ev_image ei;
+
+		cpt_push_object(&saved_obj, ctx);
+		cpt_open_object(NULL, ctx);
+
+		ei.cpt_next = CPT_NULL;
+		ei.cpt_object = CPT_OBJ_INOTIFY_EVENT;
+		ei.cpt_hdrlen = sizeof(ei);
+		ei.cpt_content = CPT_CONTENT_NAME;
+		ei.cpt_wd = kev->event.wd;
+		ei.cpt_mask = kev->event.mask;
+		ei.cpt_cookie = kev->event.cookie;
+		ei.cpt_namelen = kev->name ? strlen(kev->name) : 0;
+
+		ctx->write(&ei, sizeof(ei), ctx);
+
+		if (kev->name) {
+			ctx->write(kev->name, ei.cpt_namelen+1, ctx);
+			ctx->align(ctx);
+		}
+
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+	mutex_unlock(&dev->ev_mutex);
+
+	cpt_close_object(ctx);
+
+	return err;
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_kernel.c linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_kernel.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_kernel.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_kernel.c	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,166 @@
+/*
+ *
+ *  kernel/cpt/cpt_kernel.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#define __KERNEL_SYSCALLS__ 1
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#ifdef CONFIG_X86
+#include <asm/cpufeature.h>
+#endif
+#include <linux/cpt_image.h>
+
+#include "cpt_kernel.h"
+#include "cpt_syscalls.h"
+
+int debug_level = 1;
+
+#ifdef CONFIG_X86_32
+
+extern void kernel_thread_helper(void);
+
+/*
+ * Create a kernel thread
+ */
+int asm_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags, pid_t pid)
+{
+	struct pt_regs regs;
+
+	memset(&regs, 0, sizeof(regs));
+
+	regs.ebx = (unsigned long) fn;
+	regs.edx = (unsigned long) arg;
+
+	regs.xds = __USER_DS;
+	regs.xes = __USER_DS;
+	regs.orig_eax = -1;
+	regs.eip = (unsigned long) kernel_thread_helper;
+	regs.xcs = __KERNEL_CS;
+	regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
+
+	/* Ok, create the new process.. */
+	return do_fork_pid(flags | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL, pid);
+}
+#endif
+
+#ifdef CONFIG_IA64
+pid_t
+asm_kernel_thread (int (*fn)(void *), void *arg, unsigned long flags, pid_t pid)
+{
+	extern void start_kernel_thread (void);
+	unsigned long *helper_fptr = (unsigned long *) &start_kernel_thread;
+	struct {
+		struct switch_stack sw;
+		struct pt_regs pt;
+	} regs;
+
+	memset(&regs, 0, sizeof(regs));
+	regs.pt.cr_iip = helper_fptr[0];	/* set entry point (IP) */
+	regs.pt.r1 = helper_fptr[1];		/* set GP */
+	regs.pt.r9 = (unsigned long) fn;	/* 1st argument */
+	regs.pt.r11 = (unsigned long) arg;	/* 2nd argument */
+	/* Preserve PSR bits, except for bits 32-34 and 37-45, which we can't read.  */
+	regs.pt.cr_ipsr = ia64_getreg(_IA64_REG_PSR) | IA64_PSR_BN;
+	regs.pt.cr_ifs = 1UL << 63;		/* mark as valid, empty frame */
+	regs.sw.ar_fpsr = regs.pt.ar_fpsr = ia64_getreg(_IA64_REG_AR_FPSR);
+	regs.sw.ar_bspstore = (unsigned long) current + IA64_RBS_OFFSET;
+	regs.sw.pr = (1 << 2 /*PRED_KERNEL_STACK*/);
+	return do_fork_pid(flags | CLONE_UNTRACED, 0, &regs.pt, 0, NULL, NULL, pid);
+}
+#endif
+
+int local_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags, pid_t pid)
+{
+	pid_t ret;
+
+	if (current->fs == NULL) {
+		/* do_fork_pid() hates processes without fs, oopses. */
+		printk("CPT BUG: local_kernel_thread: current->fs==NULL\n");
+		return -EINVAL;
+	}
+	if (!try_module_get(THIS_MODULE))
+		return -EBUSY;
+	ret = asm_kernel_thread(fn, arg, flags, pid);
+	if (ret < 0)
+		module_put(THIS_MODULE);
+	return ret;
+}
+
+#ifdef __i386__
+int __execve(const char *file, char **argv, char **envp)
+{
+	long res;
+	__asm__ volatile ("int $0x80"
+	: "=a" (res)
+	: "0" (__NR_execve),"b" ((long)(file)),"c" ((long)(argv)),
+		  "d" ((long)(envp)) : "memory");
+	return (int)res;
+}
+#endif
+
+int sc_execve(char *cmd, char **argv, char **env)
+{
+	int ret;
+#ifndef __i386__
+	ret = execve(cmd, argv, env);
+#else
+	ret = __execve(cmd, argv, env);
+#endif
+	return ret;
+}
+
+unsigned int test_cpu_caps(void)
+{
+	unsigned int flags = 0;
+
+#ifdef CONFIG_X86
+	if (boot_cpu_has(X86_FEATURE_CMOV))
+		flags |= 1 << CPT_CPU_X86_CMOV;
+	if (cpu_has_fxsr)
+		flags |= 1 << CPT_CPU_X86_FXSR;
+	if (cpu_has_xmm)
+		flags |= 1 << CPT_CPU_X86_SSE;
+#ifndef CONFIG_X86_64
+	if (cpu_has_xmm2)
+#endif
+		flags |= 1 << CPT_CPU_X86_SSE2;
+	if (cpu_has_mmx)
+		flags |= 1 << CPT_CPU_X86_MMX;
+	if (boot_cpu_has(X86_FEATURE_3DNOW))
+		flags |= 1 << CPT_CPU_X86_3DNOW;
+	if (boot_cpu_has(X86_FEATURE_3DNOWEXT))
+		flags |= 1 << CPT_CPU_X86_3DNOW2;
+	if (boot_cpu_has(X86_FEATURE_SEP))
+		flags |= 1 << CPT_CPU_X86_SEP;
+#ifdef CONFIG_X86_64
+	flags |= 1 << CPT_CPU_X86_EMT64;
+#endif
+#endif
+#ifdef CONFIG_IA64
+	flags |= 1 << CPT_CPU_X86_IA64;
+	flags |= 1 << CPT_CPU_X86_FXSR;
+#endif
+	return flags;
+}
+
+unsigned int test_kernel_config(void)
+{
+	unsigned int flags = 0;
+#ifdef CONFIG_X86
+#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
+	flags |= 1 << CPT_KERNEL_CONFIG_PAE;
+#endif
+#endif
+	return flags;
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_kernel.h linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_kernel.h
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_kernel.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_kernel.h	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,98 @@
+/* Interface to kernel vars which we had to _add_. */
+
+#define PRIO_TO_NICE(prio)	((prio) - MAX_RT_PRIO - 20)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)
+#define TASK_TRACED TASK_STOPPED
+#define unix_peer(sk) ((sk)->sk_pair)
+#define page_mapcount(pg) ((pg)->mapcount)
+#else
+#define unix_peer(sk) (unix_sk(sk)->peer)
+#endif
+
+#ifdef CONFIG_X86_64
+#define cpu_has_fxsr 1
+#endif
+#ifdef CONFIG_IA64
+#define cpu_has_fxsr 1
+#endif
+
+#define CPT_SIG_IGNORE_MASK (\
+        (1 << (SIGCONT - 1)) | (1 << (SIGCHLD - 1)) | \
+	(1 << (SIGWINCH - 1)) | (1 << (SIGURG - 1)))
+
+static inline void do_gettimespec(struct timespec *ts)
+{
+	struct timeval tv;
+	do_gettimeofday(&tv);
+	ts->tv_sec = tv.tv_sec;
+	ts->tv_nsec = tv.tv_usec*1000;
+}
+
+int local_kernel_thread(int (*fn)(void *),
+		void * arg,
+		unsigned long flags,
+		pid_t pid);
+int asm_kernel_thread(int (*fn)(void *),
+		void * arg,
+		unsigned long flags,
+		pid_t pid);
+
+#if defined(CONFIG_VZFS_FS) || defined(CONFIG_VZFS_FS_MODULE)
+void vefs_track_force_stop(struct super_block *super);
+
+void vefs_track_notify(struct dentry *vdentry, int track_cow);
+
+struct dentry * vefs_replaced_dentry(struct dentry *de);
+int vefs_is_renamed_dentry(struct dentry *vde, struct dentry *pde);
+#else
+static inline void vefs_track_force_stop(struct super_block *super) { };
+
+static inline void vefs_track_notify(struct dentry *vdentry, int track_cow) { };
+#endif
+
+unsigned int test_cpu_caps(void);
+unsigned int test_kernel_config(void);
+
+#define test_one_flag_old(src, dst, flag, message, ret) \
+if (src & (1 << flag)) \
+	if (!(dst & (1 << flag))) { \
+		wprintk("Destination cpu does not have " message "\n"); \
+		ret = 1; \
+	}
+#define test_one_flag(src, dst, flag, message, ret) \
+if (src & (1 << flag)) \
+	if (!(dst & (1 << flag))) { \
+		eprintk_ctx("Destination cpu does not have " message "\n"); \
+		ret = 1; \
+	}
+
+static inline void
+_set_normalized_timespec(struct timespec *ts, time_t sec, long nsec)
+{
+	while (nsec >= NSEC_PER_SEC) {
+		nsec -= NSEC_PER_SEC;
+		++sec;
+	}
+	while (nsec < 0) {
+		nsec += NSEC_PER_SEC;
+		--sec;
+	}
+	ts->tv_sec = sec;
+	ts->tv_nsec = nsec;
+}
+
+static inline struct timespec
+_ns_to_timespec(const nsec_t nsec)
+{
+	struct timespec ts;
+
+	if (!nsec)
+		return (struct timespec) {0, 0};
+
+	ts.tv_sec = div_long_long_rem_signed(nsec, NSEC_PER_SEC, &ts.tv_nsec);
+	if (unlikely(nsec < 0))
+		_set_normalized_timespec(&ts, ts.tv_sec, ts.tv_nsec);
+
+	return ts;
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_mm.c linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_mm.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_mm.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_mm.c	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,907 @@
+/*
+ *
+ *  kernel/cpt/cpt_mm.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/errno.h>
+#include <linux/ve.h>
+#include <linux/pagemap.h>
+#include <linux/rmap.h>
+#ifdef CONFIG_X86
+#include <asm/ldt.h>
+#endif
+#include <asm/mmu.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_kernel.h"
+#include "cpt_fsmagic.h"
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+#include "cpt_pagein.h"
+#endif
+#include "cpt_ubc.h"
+
+static int collect_one_aio_ctx(struct mm_struct *mm, struct kioctx *aio_ctx,
+			       cpt_context_t *ctx)
+{
+	if (!list_empty(&aio_ctx->run_list)) {
+		/* This is impossible at least with kernel 2.6.8.1 or 2.6.16 */
+		eprintk_ctx("run list is not empty, cannot suspend AIO\n");
+		return -EBUSY;
+	}
+
+	/* Wait for pending IOCBs. Linux AIO is mostly _fake_.
+	 * It is actually synchronous, except for direct IO and
+	 * some funny raw USB things, which cannot happen inside VE.
+	 * However, we do this for future.
+	 *
+	 * Later note: in 2.6.16 we may allow O_DIRECT, so that
+	 * it is not meaningless code.
+	 */
+	wait_for_all_aios(aio_ctx);
+
+	if (!list_empty(&aio_ctx->run_list) ||
+	    !list_empty(&aio_ctx->active_reqs) ||
+	    aio_ctx->reqs_active) {
+		eprintk_ctx("were not able to suspend AIO\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int collect_one_mm(struct mm_struct *mm, cpt_context_t * ctx)
+{
+	struct vm_area_struct *vma;
+
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		if (vma->vm_file) {
+			if (cpt_object_add(CPT_OBJ_FILE, vma->vm_file, ctx) == NULL)
+				return -ENOMEM;
+		}
+	}
+#ifdef CONFIG_USER_RESOURCE
+	if (cpt_add_ubc(mm->mm_ub, ctx) == NULL)
+		return -ENOMEM;
+#endif
+
+	if (mm->ioctx_list) {
+		struct kioctx *aio_ctx;
+		int err;
+
+		for (aio_ctx = mm->ioctx_list; aio_ctx; aio_ctx = aio_ctx->next)
+			if ((err = collect_one_aio_ctx(mm, aio_ctx, ctx)) != 0)
+				return err;
+	}
+
+	return 0;
+}
+
+int cpt_collect_mm(cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+	int err;
+	int index;
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+		if (tsk->mm && cpt_object_add(CPT_OBJ_MM, tsk->mm, ctx) == NULL)
+			return -ENOMEM;
+	}
+
+	index = 1;
+	for_each_object(obj, CPT_OBJ_MM) {
+		struct mm_struct *mm = obj->o_obj;
+		if (obj->o_count != atomic_read(&mm->mm_users)) {
+			eprintk_ctx("mm_struct is referenced outside %d %d\n", obj->o_count, atomic_read(&mm->mm_users));
+			return -EAGAIN;
+		}
+		cpt_obj_setindex(obj, index++, ctx);
+
+		if ((err = collect_one_mm(mm, ctx)) != 0)
+			return err;
+	}
+
+	return 0;
+}
+
+static int zcnt, scnt, scnt0, ucnt;
+
+/* Function where_is_anon_page() returns address of a anonymous page in mm
+ * of already dumped process. This happens f.e. after fork(). We do not use
+ * this right now, just keep statistics, it is diffucult to restore such state,
+ * but the most direct use is to save space in dumped image. */
+
+
+static inline unsigned long
+vma_address0(struct page *page, struct vm_area_struct *vma)
+{
+	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+	unsigned long address;
+
+	address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+	if (unlikely(address < vma->vm_start || address >= vma->vm_end))
+		address |= 1;
+	return address;
+}
+
+static int really_this_one(struct vm_area_struct *vma, unsigned long address,
+			   struct page *page)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	spinlock_t *ptl;
+	int result;
+
+	pgd = pgd_offset(mm, address);
+	if (unlikely(!pgd_present(*pgd)))
+		return 0;
+
+	pud = pud_offset(pgd, address);
+	if (!pud_present(*pud))
+		return 0;
+
+	pmd = pmd_offset(pud, address);
+	if (unlikely(!pmd_present(*pmd)))
+		return 0;
+
+	result = 0;
+	pte = pte_offset_map(pmd, address);
+	if (!pte_present(*pte)) {
+		pte_unmap(pte);
+		return 0;
+	}
+
+	ptl = pte_lockptr(mm, pmd);
+	spin_lock(ptl);
+	if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte))
+		result = 1;
+	pte_unmap_unlock(pte, ptl);
+	return result;
+}
+
+static loff_t where_is_anon_page(cpt_object_t *mmobj, unsigned long mapaddr,
+				 struct page *page, cpt_context_t * ctx)
+{
+	loff_t mmptr = CPT_NULL;
+	struct anon_vma *anon_vma;
+	struct vm_area_struct *vma;
+	int idx = mmobj->o_index;
+
+	if (!PageAnon(page))
+		return CPT_NULL;
+
+	anon_vma = page_lock_anon_vma(page);
+	if (!anon_vma)
+		return CPT_NULL;
+
+	list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+		unsigned long addr = vma_address0(page, vma);
+		cpt_object_t *obj;
+
+		/* We do not try to support mremapped regions (addr != mapaddr),
+		 * only mmaps directly inherited via fork().
+		 * With this limitation we may check self-consistency of
+		 * vmas (vm_start, vm_pgoff, anon_vma) before
+		 * doing __copy_page_range() in rst_mm.
+		 */
+		if (mmobj->o_obj != vma->vm_mm && addr == mapaddr) {
+			obj = lookup_cpt_object(CPT_OBJ_MM, vma->vm_mm, ctx);
+			if (obj && obj->o_pos != CPT_NULL && obj->o_index < idx) {
+				if (really_this_one(vma, addr, page)) {
+					mmptr = obj->o_pos;
+					idx = obj->o_index;
+				}
+			}
+		}
+	}
+	spin_unlock(&anon_vma->lock);
+
+	return mmptr;
+}
+
+struct page_area
+{
+	int type;
+	unsigned long start;
+	unsigned long end;
+	pgoff_t pgoff;
+	loff_t mm;
+	__u64 list[16];
+};
+
+struct page_desc
+{
+	int	type;
+	pgoff_t	index;
+	loff_t	mm;
+	int	shared;
+};
+
+enum {
+	PD_ABSENT,
+	PD_COPY,
+	PD_ZERO,
+	PD_CLONE,
+	PD_FUNKEY,
+	PD_LAZY,
+	PD_ITER,
+	PD_ITERYOUNG,
+};
+
+/* 0: page can be obtained from backstore, or still not mapped anonymous  page,
+      or something else, which does not requre copy.
+   1: page requires copy
+   2: page requres copy but its content is zero. Quite useless.
+   3: wp page is shared after fork(). It is to be COWed when modified.
+   4: page is something unsupported... We copy it right now.
+ */
+
+
+
+static void page_get_desc(cpt_object_t *mmobj,
+			  struct vm_area_struct *vma, unsigned long addr,
+			  struct page_desc *pdesc, cpt_context_t * ctx)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *ptep, pte;
+	spinlock_t *ptl;
+	struct page *pg = NULL;
+	pgoff_t linear_index = (addr - vma->vm_start)/PAGE_SIZE + vma->vm_pgoff;
+
+	pdesc->index = linear_index;
+	pdesc->shared = 0;
+
+	if (vma->vm_flags & VM_IO) {
+		pdesc->type = PD_ABSENT;
+		return;
+	}
+
+	pgd = pgd_offset(mm, addr);
+	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+		goto out_absent;
+	pud = pud_offset(pgd, addr);
+	if (pud_none(*pud) || unlikely(pud_bad(*pud)))
+		goto out_absent;
+	pmd = pmd_offset(pud, addr);
+	if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
+		goto out_absent;
+#ifdef CONFIG_X86
+	if (pmd_huge(*pmd)) {
+		eprintk_ctx("page_huge\n");
+		goto out_unsupported;
+	}
+#endif
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+retry:
+#endif
+	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
+	pte = *ptep;
+	pte_unmap(ptep);
+
+	if (pte_none(pte))
+		goto out_absent_unlock;
+
+	if (!pte_present(pte)) {
+		if (pte_file(pte)) {
+			pdesc->index = pte_to_pgoff(pte);
+			goto out_absent_unlock;
+		}
+		if (vma->vm_flags & VM_SHARED) {
+			/* It is impossible: shared mappings cannot be in swap */
+			eprintk_ctx("shared mapping is not present: %08lx@%Ld\n", addr, mmobj->o_pos);
+			goto out_unsupported_unlock;
+		}
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+		/* Otherwise it is in swap. */
+		if (!ctx->lazy_vm) {
+			int err;
+			/* If lazy transfer is not enabled,
+			 * raise it from swap now, so that we
+			 * save at least when the page is shared.
+			 */
+			spin_unlock(ptl);
+			err = handle_mm_fault(mm, vma, addr, 0);
+			if (err == VM_FAULT_SIGBUS)
+				goto out_absent;
+			if (err == VM_FAULT_OOM)
+				goto out_absent;
+			err = 0;
+			goto retry;
+		}
+#endif
+		pdesc->type = PD_LAZY;
+		goto out_unlock;
+	}
+
+	if ((pg = vm_normal_page(vma, addr, pte)) == NULL) {
+		pdesc->type = PD_COPY;
+		goto out_unlock;
+	}
+
+	get_page(pg);
+	spin_unlock(ptl);
+
+	if (pg->mapping && !PageAnon(pg)) {
+		if (vma->vm_file == NULL) {
+			eprintk_ctx("pg->mapping!=NULL for fileless vma: %08lx\n", addr);
+			goto out_unsupported;
+		}
+		if (vma->vm_file->f_mapping != pg->mapping) {
+			eprintk_ctx("pg->mapping!=f_mapping: %08lx %p %p %Ld\n",
+				    addr, vma->vm_file->f_mapping, pg->mapping,
+				    mmobj->o_pos);
+			goto out_unsupported;
+		}
+		pdesc->index = (pg->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT));
+		/* Page is in backstore. For us it is like
+		 * it is not present.
+		 */
+		goto out_absent;
+	}
+
+	if (PageReserved(pg)) {
+		/* Special case: ZERO_PAGE is used, when an
+		 * anonymous page is accessed but not written. */
+		if (pg == ZERO_PAGE(addr)) {
+			if (pte_write(pte)) {
+				eprintk_ctx("not funny already, writable ZERO_PAGE\n");
+				goto out_unsupported;
+			}
+			zcnt++;
+			goto out_absent;
+		}
+		eprintk_ctx("reserved page %lu at %08lx@%Ld\n", pg->index,
+			    addr, mmobj->o_pos);
+		goto out_unsupported;
+	}
+
+	if (pg == ZERO_PAGE(addr)) {
+		wprintk_ctx("that's how it works now\n");
+	}
+
+	if (!pg->mapping) {
+		eprintk_ctx("page without mapping at %08lx@%Ld\n", addr,
+			    mmobj->o_pos);
+		goto out_unsupported;
+	}
+
+	if (pg->mapping && page_mapcount(pg) > 1) {
+		pdesc->shared = 1;
+		pdesc->mm = where_is_anon_page(mmobj, addr, pg, ctx);
+		if (pdesc->mm != CPT_NULL) {
+			scnt0++;
+			pdesc->type = PD_CLONE;
+			goto out_put;
+		} else {
+			scnt++;
+		}
+	}
+#ifdef CONFIG_VZ_CHECKPOINT_ITER
+	if (ctx->iter_done &&
+	    test_bit(PG_checkpointed, &pg->flags)) {
+		if (pte_write(pte)) {
+			wprintk_ctx("writable PG_checkpointed page\n");
+		}
+		pdesc->index = page_to_pfn(pg);
+		pdesc->type = pte_young(pte) ? PD_ITERYOUNG : PD_ITER;
+		goto out_put;
+	}
+#endif
+	pdesc->type = pte_young(pte) ? PD_COPY : PD_LAZY;
+
+out_put:
+	if (pg)
+		put_page(pg);
+	return;
+
+out_unlock:
+	spin_unlock(ptl);
+	goto out_put;
+
+out_absent_unlock:
+	spin_unlock(ptl);
+out_absent:
+	pdesc->type = PD_ABSENT;
+	goto out_put;
+
+out_unsupported_unlock:
+	spin_unlock(ptl);
+out_unsupported:
+	ucnt++;
+	pdesc->type = PD_FUNKEY;
+	goto out_put;
+}
+
+/* ATTN: We give "current" to get_user_pages(). This is wrong, but get_user_pages()
+ * does not really need this thing. It just stores some page fault stats there.
+ *
+ * BUG: some archs (f.e. sparc64, but not Intel*) require flush cache pages
+ * before accessing vma.
+ */
+void dump_pages(struct vm_area_struct *vma, unsigned long start,
+		unsigned long end, struct cpt_context *ctx)
+{
+#define MAX_PAGE_BATCH 16
+	struct page *pg[MAX_PAGE_BATCH];
+	int npages = (end - start)/PAGE_SIZE;
+	int count = 0;
+
+	while (count < npages) {
+		int copy = npages - count;
+		int n;
+
+		if (copy > MAX_PAGE_BATCH)
+			copy = MAX_PAGE_BATCH;
+		n = get_user_pages(current, vma->vm_mm, start, copy,
+				   0, 1, pg, NULL);
+		if (n == copy) {
+			int i;
+			for (i=0; i<n; i++) {
+				char *maddr = kmap(pg[i]);
+				ctx->write(maddr, PAGE_SIZE, ctx);
+				kunmap(pg[i]);
+			}
+		} else {
+			eprintk_ctx("get_user_pages fault");
+			for ( ; n > 0; n--)
+				page_cache_release(pg[n-1]);
+			return;
+		}
+		start += n*PAGE_SIZE;
+		count += n;
+		for ( ; n > 0; n--)
+			page_cache_release(pg[n-1]);
+	}
+	return;
+}
+
+int dump_page_block(struct vm_area_struct *vma, struct cpt_page_block *pgb,
+		    int copy,
+		    struct cpt_context *ctx)
+{
+	loff_t saved_object;
+
+	cpt_push_object(&saved_object, ctx);
+
+	pgb->cpt_object = (copy != PD_LAZY) ? CPT_OBJ_PAGES : CPT_OBJ_LAZYPAGES;
+	pgb->cpt_hdrlen = sizeof(*pgb);
+	pgb->cpt_content = (copy == PD_COPY || copy == PD_LAZY) ? CPT_CONTENT_DATA : CPT_CONTENT_VOID;
+
+	ctx->write(pgb, sizeof(*pgb), ctx);
+	if (copy == PD_COPY || copy == PD_LAZY)
+		dump_pages(vma, pgb->cpt_start, pgb->cpt_end, ctx);
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_object, ctx);
+	return 0;
+}
+
+int dump_remappage_block(struct vm_area_struct *vma, struct page_area *pa,
+			 struct cpt_context *ctx)
+{
+	struct cpt_remappage_block pgb;
+	loff_t saved_object;
+
+	cpt_push_object(&saved_object, ctx);
+
+	pgb.cpt_object = CPT_OBJ_REMAPPAGES;
+	pgb.cpt_hdrlen = sizeof(pgb);
+	pgb.cpt_content = CPT_CONTENT_VOID;
+	pgb.cpt_start = pa->start;
+	pgb.cpt_end = pa->end;
+	pgb.cpt_pgoff = pa->pgoff - (pa->end-pa->start)/PAGE_SIZE + 1;
+
+	ctx->write(&pgb, sizeof(pgb), ctx);
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_object, ctx);
+	return 0;
+}
+
+int dump_copypage_block(struct vm_area_struct *vma, struct page_area *pa,
+			struct cpt_context *ctx)
+{
+	struct cpt_copypage_block pgb;
+	loff_t saved_object;
+
+	cpt_push_object(&saved_object, ctx);
+
+	pgb.cpt_object = CPT_OBJ_COPYPAGES;
+	pgb.cpt_hdrlen = sizeof(pgb);
+	pgb.cpt_content = CPT_CONTENT_VOID;
+	pgb.cpt_start = pa->start;
+	pgb.cpt_end = pa->end;
+	pgb.cpt_source = pa->mm;
+
+	ctx->write(&pgb, sizeof(pgb), ctx);
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_object, ctx);
+	return 0;
+}
+
+int dump_lazypage_block(struct vm_area_struct *vma, struct page_area *pa,
+			cpt_context_t *ctx)
+{
+	struct cpt_lazypage_block pgb;
+	loff_t saved_object;
+
+	cpt_push_object(&saved_object, ctx);
+
+	pgb.cpt_object = CPT_OBJ_LAZYPAGES;
+	pgb.cpt_hdrlen = sizeof(pgb);
+	pgb.cpt_content = CPT_CONTENT_VOID;
+	pgb.cpt_start = pa->start;
+	pgb.cpt_end = pa->end;
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	pgb.cpt_index = cpt_alloc_pgin_index(vma, pa->start,
+				     (pa->end-pa->start)/PAGE_SIZE, ctx);
+#endif
+	ctx->write(&pgb, sizeof(pgb), ctx);
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_object, ctx);
+	return 0;
+}
+
+int dump_iterpage_block(struct vm_area_struct *vma, struct page_area *pa,
+			cpt_context_t *ctx)
+{
+	struct cpt_iterpage_block pgb;
+	loff_t saved_object;
+
+	cpt_push_object(&saved_object, ctx);
+
+	pgb.cpt_object = pa->type == PD_ITER ? CPT_OBJ_ITERPAGES :
+		CPT_OBJ_ITERYOUNGPAGES;
+	pgb.cpt_hdrlen = sizeof(pgb);
+	pgb.cpt_content = CPT_CONTENT_VOID;
+	pgb.cpt_start = pa->start;
+	pgb.cpt_end = pa->end;
+	ctx->write(&pgb, sizeof(pgb), ctx);
+
+	ctx->write(pa->list, 8*((pa->end-pa->start)/PAGE_SIZE), ctx);
+
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_object, ctx);
+	return 0;
+}
+
+
+static int can_expand(struct page_area *pa, struct page_desc *pd)
+{
+	if (pa->start == pa->end)
+		return 1;
+	if (pa->type != pd->type)
+		return 0;
+	if (pa->type == PD_ITER || pa->type == PD_ITERYOUNG) {
+		if (pa->end - pa->start >= PAGE_SIZE*16)
+			return 0;
+		pa->list[(pa->end - pa->start)/PAGE_SIZE] = pd->index;
+	}
+	if (pa->type == PD_ABSENT)
+		return pd->index == pa->pgoff + 1;
+	if (pa->type == PD_CLONE)
+		return pd->mm == pa->mm;
+	return 1;
+}
+
+static int dump_one_vma(cpt_object_t *mmobj,
+			struct vm_area_struct *vma, struct cpt_context *ctx)
+{
+	struct cpt_vma_image *v = cpt_get_buf(ctx);
+	unsigned long addr;
+	loff_t saved_object;
+	struct cpt_page_block pgb;
+	struct page_area pa;
+	int cloned_pages = 0;
+
+	cpt_push_object(&saved_object, ctx);
+
+	v->cpt_object = CPT_OBJ_VMA;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_start = vma->vm_start;
+	v->cpt_end = vma->vm_end;
+	v->cpt_flags = vma->vm_flags;
+	if (vma->vm_flags&VM_HUGETLB) {
+		eprintk_ctx("huge TLB VMAs are still not supported\n");
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	}
+	v->cpt_pgprot = vma->vm_page_prot.pgprot;
+	v->cpt_pgoff = vma->vm_pgoff;
+	v->cpt_file = CPT_NULL;
+	v->cpt_type = CPT_VMA_TYPE_0;
+	v->cpt_anonvma = 0;
+
+	/* We have to remember what VMAs are bound to one anon_vma.
+	 * So, we store an identifier of group of VMAs. It is handy
+	 * to use absolute address of anon_vma as this identifier. */
+	v->cpt_anonvmaid = (unsigned long)vma->anon_vma;
+
+	if (vma->vm_file) {
+		struct file *filp;
+		cpt_object_t *obj = lookup_cpt_object(CPT_OBJ_FILE, vma->vm_file, ctx);
+		if (obj == NULL) BUG();
+		filp = obj->o_obj;
+		if (filp->f_op &&
+		    filp->f_op->read == NULL &&
+		    filp->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_TMPFS)
+			v->cpt_type = CPT_VMA_TYPE_SHM;
+		v->cpt_file = obj->o_pos;
+	}
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	pa.type = PD_ABSENT;
+	pa.pgoff = vma->vm_pgoff;
+	pa.mm = CPT_NULL;
+	pa.start = vma->vm_start;
+	pa.end = vma->vm_start;
+
+	for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
+		struct page_desc pd;
+
+		page_get_desc(mmobj, vma, addr, &pd, ctx);
+		cloned_pages += pd.shared;
+
+		if (pd.type == PD_FUNKEY) {
+			eprintk_ctx("dump_one_vma: funkey page\n");
+			return -EINVAL;
+		}
+
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+		if (pd.type == PD_LAZY &&
+		    (ctx->lazy_vm == 0 || (vma->vm_flags&VM_LOCKED)))
+			pd.type = PD_COPY;
+#else
+		if (pd.type == PD_LAZY)
+			pd.type = PD_COPY;
+#endif
+
+		if (!can_expand(&pa, &pd)) {
+			if (pa.type == PD_COPY ||
+			    pa.type == PD_ZERO) {
+				pgb.cpt_start = pa.start;
+				pgb.cpt_end = pa.end;
+				dump_page_block(vma, &pgb, pa.type, ctx);
+			} else if (pa.type == PD_CLONE) {
+				dump_copypage_block(vma, &pa, ctx);
+				cloned_pages++;
+			} else if (pa.type == PD_LAZY) {
+				dump_lazypage_block(vma, &pa, ctx);
+			} else if (pa.type == PD_ITER || pa.type == PD_ITERYOUNG) {
+				dump_iterpage_block(vma, &pa, ctx);
+				cloned_pages++;
+			} else if (pa.type == PD_ABSENT &&
+				   pa.pgoff != (pa.end - vma->vm_start)/PAGE_SIZE + vma->vm_pgoff - 1) {
+				dump_remappage_block(vma, &pa, ctx);
+			}
+			pa.start = addr;
+		}
+		pa.type = pd.type;
+		pa.end = addr + PAGE_SIZE;
+		pa.pgoff = pd.index;
+		if (addr == pa.start)
+			pa.list[0] = pd.index;
+		pa.mm = pd.mm;
+	}
+
+	if (pa.end > pa.start) {
+		if (pa.type == PD_COPY ||
+		    pa.type == PD_ZERO) {
+			pgb.cpt_start = pa.start;
+			pgb.cpt_end = pa.end;
+			dump_page_block(vma, &pgb, pa.type, ctx);
+		} else if (pa.type == PD_CLONE) {
+			dump_copypage_block(vma, &pa, ctx);
+			cloned_pages++;
+		} else if (pa.type == PD_LAZY) {
+			dump_lazypage_block(vma, &pa, ctx);
+		} else if (pa.type == PD_ITER || pa.type == PD_ITERYOUNG) {
+			dump_iterpage_block(vma, &pa, ctx);
+			cloned_pages++;
+		} else if (pa.type == PD_ABSENT &&
+			   pa.pgoff != (pa.end - vma->vm_start)/PAGE_SIZE + vma->vm_pgoff - 1) {
+			dump_remappage_block(vma, &pa, ctx);
+		}
+	}
+
+	if (cloned_pages) {
+		__u32 anonvma = 1;
+		loff_t anonpos = ctx->current_object + offsetof(struct cpt_vma_image, cpt_anonvma);
+		ctx->pwrite(&anonvma, 4, ctx, anonpos);
+	}
+
+	cpt_close_object(ctx);
+
+	cpt_pop_object(&saved_object, ctx);
+
+	return 0;
+}
+
+static int dump_one_aio_ctx(struct mm_struct *mm, struct kioctx *aio_ctx,
+			    cpt_context_t *ctx)
+{
+	loff_t saved_object;
+	struct cpt_aio_ctx_image aimg;
+
+	if (!list_empty(&aio_ctx->run_list) ||
+	    !list_empty(&aio_ctx->active_reqs) ||
+	    aio_ctx->reqs_active) {
+		eprintk_ctx("AIO is active after suspend\n");
+		return -EBUSY;
+	}
+
+	cpt_push_object(&saved_object, ctx);
+
+	aimg.cpt_next = CPT_ALIGN(sizeof(aimg));
+	aimg.cpt_object = CPT_OBJ_AIO_CONTEXT;
+	aimg.cpt_hdrlen = sizeof(aimg);
+	aimg.cpt_content = CPT_CONTENT_ARRAY;
+
+	aimg.cpt_max_reqs = aio_ctx->max_reqs;
+	aimg.cpt_ring_pages = aio_ctx->ring_info.nr_pages;
+	aimg.cpt_nr = aio_ctx->ring_info.nr;
+	aimg.cpt_tail = aio_ctx->ring_info.tail;
+	aimg.cpt_mmap_base = aio_ctx->ring_info.mmap_base;
+
+	ctx->write(&aimg, sizeof(aimg), ctx);
+
+	cpt_pop_object(&saved_object, ctx);
+	return 0;
+}
+
+static int dump_one_mm(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct mm_struct *mm = obj->o_obj;
+	struct vm_area_struct *vma;
+	struct cpt_mm_image *v = cpt_get_buf(ctx);
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_next = -1;
+	v->cpt_object = CPT_OBJ_MM;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_start_code = mm->start_code;
+	v->cpt_end_code = mm->end_code;
+	v->cpt_start_data = mm->start_data;
+	v->cpt_end_data = mm->end_data;
+	v->cpt_start_brk = mm->start_brk;
+	v->cpt_brk = mm->brk;
+	v->cpt_start_stack = mm->start_stack;
+	v->cpt_start_arg = mm->arg_start;
+	v->cpt_end_arg = mm->arg_end;
+	v->cpt_start_env = mm->env_start;
+	v->cpt_end_env = mm->env_end;
+	v->cpt_def_flags = mm->def_flags;
+#ifdef CONFIG_USER_RESOURCE
+	v->cpt_mmub = cpt_lookup_ubc(mm->mm_ub, ctx);
+#endif
+	v->cpt_dumpable = mm->dumpable;
+	v->cpt_vps_dumpable = mm->vps_dumpable;
+	v->cpt_used_hugetlb = 0; /* not used */
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+#ifdef CONFIG_X86
+	if (mm->context.size) {
+		loff_t saved_object;
+		struct cpt_obj_bits b;
+		int size;
+
+		dprintk_ctx("nontrivial LDT\n");
+
+		cpt_push_object(&saved_object, ctx);
+
+		cpt_open_object(NULL, ctx);
+		b.cpt_next = CPT_NULL;
+		b.cpt_object = CPT_OBJ_BITS;
+		b.cpt_hdrlen = sizeof(b);
+		b.cpt_content = CPT_CONTENT_MM_CONTEXT;
+		b.cpt_size = mm->context.size*LDT_ENTRY_SIZE;
+
+		ctx->write(&b, sizeof(b), ctx);
+
+		size = mm->context.size*LDT_ENTRY_SIZE;
+
+#if defined(CONFIG_X86_64) || defined(CONFIG_XEN) || \
+			LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15)
+		ctx->write(mm->context.ldt, size, ctx);
+#else
+		for (i = 0; i < size; i += PAGE_SIZE) {
+			int nr = i / PAGE_SIZE, bytes;
+			char *kaddr = kmap(mm->context.ldt_pages[nr]);
+
+			bytes = size - i;
+			if (bytes > PAGE_SIZE)
+				bytes = PAGE_SIZE;
+			ctx->write(kaddr, bytes, ctx);
+			kunmap(mm->context.ldt_pages[nr]);
+		}
+#endif
+
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_object, ctx);
+	}
+#endif
+
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		int err;
+
+#ifdef CONFIG_X86_64
+		if (vma->vm_start == 0xFFFFE000 &&
+		    vma->vm_end == 0xFFFFF000)
+			continue;
+#endif
+
+		if ((err = dump_one_vma(obj, vma, ctx)) != 0)
+			return err;
+	}
+
+	if (mm->ioctx_list) {
+		struct kioctx *aio_ctx;
+		int err;
+
+		for (aio_ctx = mm->ioctx_list; aio_ctx; aio_ctx = aio_ctx->next)
+			if ((err = dump_one_aio_ctx(mm, aio_ctx, ctx)) != 0)
+				return err;
+	}
+
+	cpt_close_object(ctx);
+
+	return 0;
+}
+
+int cpt_dump_vm(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	scnt = scnt0 = zcnt = 0;
+
+	cpt_open_section(ctx, CPT_SECT_MM);
+
+	for_each_object(obj, CPT_OBJ_MM) {
+		int err;
+
+		if ((err = dump_one_mm(obj, ctx)) != 0)
+			return err;
+	}
+
+	cpt_close_section(ctx);
+
+	if (scnt)
+		dprintk_ctx("cpt_dump_vm: %d shared private anon pages\n", scnt);
+	if (scnt0)
+		dprintk_ctx("cpt_dump_vm: %d anon pages are cloned\n", scnt0);
+	if (zcnt)
+		dprintk_ctx("cpt_dump_vm: %d silly pages canceled\n", zcnt);
+	return 0;
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_mm.h linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_mm.h
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_mm.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_mm.h	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,22 @@
+int cpt_collect_mm(cpt_context_t *);
+
+int cpt_dump_vm(struct cpt_context *ctx);
+
+__u32 rst_mm_flag(struct cpt_task_image *ti, struct cpt_context *ctx);
+int rst_mm_basic(cpt_object_t *obj, struct cpt_task_image *ti, struct cpt_context *ctx);
+int rst_mm_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
+
+int cpt_mm_prepare(unsigned long veid);
+
+int cpt_free_pgin_dir(struct cpt_context *);
+int cpt_start_pagein(struct cpt_context *);
+int rst_setup_pagein(struct cpt_context *);
+int rst_complete_pagein(struct cpt_context *, int);
+int rst_pageind(struct cpt_context *);
+int cpt_iteration(cpt_context_t *ctx);
+int rst_iteration(cpt_context_t *ctx);
+void rst_drop_iter_dir(cpt_context_t *ctx);
+int rst_iter(struct vm_area_struct *vma, u64 pfn,
+	     unsigned long addr, cpt_context_t * ctx);
+
+int rst_swapoff(struct cpt_context *);
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_net.c linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_net.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_net.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_net.c	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,460 @@
+/*
+ *
+ *  kernel/cpt/cpt_net.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/socket.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <net/addrconf.h>
+#include <linux/rtnetlink.h>
+#include <linux/ve.h>
+#include <linux/ve_proto.h>
+#include <linux/vzcalluser.h>
+#include <linux/cpt_image.h>
+#include <linux/nfcalls.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_kernel.h"
+#include "cpt_syscalls.h"
+
+int cpt_dump_link(struct cpt_context * ctx)
+{
+	struct net_device *dev;
+
+	cpt_open_section(ctx, CPT_SECT_NET_DEVICE);
+	for (dev = dev_base; dev; dev = dev->next) {
+		struct cpt_netdev_image v;
+
+		cpt_open_object(NULL, ctx);
+
+		v.cpt_next = CPT_NULL;
+		v.cpt_object = CPT_OBJ_NET_DEVICE;
+		v.cpt_hdrlen = sizeof(v);
+		v.cpt_content = CPT_CONTENT_VOID;
+
+		v.cpt_index = dev->ifindex;
+		v.cpt_flags = dev->flags;
+		memcpy(v.cpt_name, dev->name, IFNAMSIZ);
+		ctx->write(&v, sizeof(v), ctx);
+		cpt_close_object(ctx);
+
+		if (dev != get_exec_env()->_loopback_dev
+#if defined(CONFIG_VE_ETHDEV) || defined(CONFIG_VE_ETHDEV_MODULE)
+		    && !(KSYMREF(veth_open) && dev->open == KSYMREF(veth_open))
+#endif
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+		     && dev != get_exec_env()->_venet_dev
+#endif
+							) {
+			eprintk_ctx("unsupported netdevice %s\n", dev->name);
+			cpt_close_section(ctx);
+			return -EBUSY;
+		}
+	}
+	cpt_close_section(ctx);
+	return 0;
+}
+
+int cpt_suspend_network(struct cpt_context *ctx)
+{
+	get_exec_env()->disable_net = 1;
+	synchronize_net();
+	return 0;
+}
+
+int cpt_resume_network(struct cpt_context *ctx)
+{
+	struct ve_struct *env;
+	env = get_ve_by_id(ctx->ve_id);
+	if (!env)
+		return -ESRCH;
+	env->disable_net = 0;
+	put_ve(env);
+	return 0;
+}
+
+int cpt_dump_ifaddr(struct cpt_context * ctx)
+{
+	struct net_device *dev;
+
+	cpt_open_section(ctx, CPT_SECT_NET_IFADDR);
+	for (dev = dev_base; dev; dev = dev->next) {
+		struct in_device *idev = in_dev_get(dev);
+		struct in_ifaddr *ifa;
+
+		if (!idev)
+			continue;
+
+		for (ifa = idev->ifa_list; ifa; ifa = ifa->ifa_next) {
+			struct cpt_ifaddr_image v;
+			cpt_open_object(NULL, ctx);
+
+			v.cpt_next = CPT_NULL;
+			v.cpt_object = CPT_OBJ_NET_IFADDR;
+			v.cpt_hdrlen = sizeof(v);
+			v.cpt_content = CPT_CONTENT_VOID;
+
+			v.cpt_index = dev->ifindex;
+			v.cpt_family = AF_INET;
+			v.cpt_masklen = ifa->ifa_prefixlen;
+			v.cpt_flags = ifa->ifa_flags;
+			v.cpt_scope = ifa->ifa_scope;
+			memset(&v.cpt_address, 0, sizeof(v.cpt_address));
+			memset(&v.cpt_peer, 0, sizeof(v.cpt_peer));
+			memset(&v.cpt_broadcast, 0, sizeof(v.cpt_broadcast));
+			v.cpt_address[0] = ifa->ifa_local;
+			v.cpt_peer[0] = ifa->ifa_address;
+			v.cpt_broadcast[0] = ifa->ifa_broadcast;
+			memcpy(v.cpt_label, ifa->ifa_label, IFNAMSIZ);
+			ctx->write(&v, sizeof(v), ctx);
+			cpt_close_object(ctx);
+		}
+		in_dev_put(idev);
+	}
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+	for (dev = dev_base; dev; dev = dev->next) {
+		struct inet6_dev *idev = in6_dev_get(dev);
+		struct inet6_ifaddr *ifa;
+
+		if (!idev)
+			continue;
+
+		for (ifa = idev->addr_list; ifa; ifa = ifa->if_next) {
+			struct cpt_ifaddr_image v;
+
+			if (dev == &loopback_dev &&
+			    ifa->prefix_len == 128 &&
+			    ifa->addr.s6_addr32[0] == 0 &&
+			    ifa->addr.s6_addr32[1] == 0 &&
+			    ifa->addr.s6_addr32[2] == 0 &&
+			    ifa->addr.s6_addr32[3] == htonl(1))
+				continue;
+
+			cpt_open_object(NULL, ctx);
+
+			v.cpt_next = CPT_NULL;
+			v.cpt_object = CPT_OBJ_NET_IFADDR;
+			v.cpt_hdrlen = sizeof(v);
+			v.cpt_content = CPT_CONTENT_VOID;
+
+			v.cpt_index = dev->ifindex;
+			v.cpt_family = AF_INET6;
+			v.cpt_masklen = ifa->prefix_len;
+			v.cpt_flags = ifa->flags;
+			v.cpt_scope = ifa->scope;
+			memcpy(&v.cpt_address, &ifa->addr, 16);
+			memcpy(&v.cpt_peer, &ifa->addr, 16);
+			memset(&v.cpt_broadcast, 0, sizeof(v.cpt_broadcast));
+			memcpy(v.cpt_label, dev->name, IFNAMSIZ);
+			ctx->write(&v, sizeof(v), ctx);
+			cpt_close_object(ctx);
+		}
+		in6_dev_put(idev);
+	}
+#endif
+	cpt_close_section(ctx);
+	return 0;
+}
+
+static int cpt_dump_route(struct cpt_context * ctx)
+{
+	int err;
+	struct socket *sock;
+	struct msghdr msg;
+	struct iovec iov;
+	struct {
+		struct nlmsghdr nlh;
+		struct rtgenmsg g;
+	} req;
+	struct sockaddr_nl nladdr;
+	struct cpt_object_hdr v;
+	mm_segment_t oldfs;
+	char *pg;
+
+	err = sock_create_kern(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE, &sock);
+	if (err)
+		return err;
+
+	memset(&nladdr, 0, sizeof(nladdr));
+	nladdr.nl_family = AF_NETLINK;
+
+	req.nlh.nlmsg_len = sizeof(req);
+	req.nlh.nlmsg_type = RTM_GETROUTE;
+	req.nlh.nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST;
+	req.nlh.nlmsg_pid = 0;
+	req.g.rtgen_family = AF_INET;
+
+	iov.iov_base=&req;
+	iov.iov_len=sizeof(req);
+	msg.msg_name=&nladdr;
+	msg.msg_namelen=sizeof(nladdr);
+	msg.msg_iov=&iov;
+	msg.msg_iovlen=1;
+	msg.msg_control=NULL;
+	msg.msg_controllen=0;
+	msg.msg_flags=MSG_DONTWAIT;
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	err = sock_sendmsg(sock, &msg, sizeof(req));
+	set_fs(oldfs);
+
+	if (err < 0)
+		goto out_sock;
+
+	pg = (char*)__get_free_page(GFP_KERNEL);
+	if (pg == NULL) {
+		err = -ENOMEM;
+		goto out_sock;
+	}
+
+	cpt_open_section(ctx, CPT_SECT_NET_ROUTE);
+	cpt_open_object(NULL, ctx);
+	v.cpt_next = CPT_NULL;
+	v.cpt_object = CPT_OBJ_NET_ROUTE;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_NLMARRAY;
+
+	ctx->write(&v, sizeof(v), ctx);
+
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+restart:
+#endif
+	for (;;) {
+		struct nlmsghdr *h;
+
+		iov.iov_base = pg;
+		iov.iov_len = PAGE_SIZE;
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		err = sock_recvmsg(sock, &msg, PAGE_SIZE, MSG_DONTWAIT);
+		set_fs(oldfs);
+
+		if (err < 0)
+			goto out_sock_pg;
+		if (msg.msg_flags & MSG_TRUNC) {
+			err = -ENOBUFS;
+			goto out_sock_pg;
+		}
+
+		h = (struct nlmsghdr*)pg;
+		while (NLMSG_OK(h, err)) {
+			if (h->nlmsg_type == NLMSG_DONE) {
+				err = 0;
+				goto done;
+			}
+			if (h->nlmsg_type == NLMSG_ERROR) {
+				struct nlmsgerr *errm = (struct nlmsgerr*)NLMSG_DATA(h);
+				err = errm->error;
+				eprintk_ctx("NLMSG error: %d\n", errm->error);
+				goto done;
+			}
+			if (h->nlmsg_type != RTM_NEWROUTE) {
+				eprintk_ctx("NLMSG: %d\n", h->nlmsg_type);
+				err = -EINVAL;
+				goto done;
+			}
+			ctx->write(h, NLMSG_ALIGN(h->nlmsg_len), ctx);
+			h = NLMSG_NEXT(h, err);
+		}
+		if (err) {
+			eprintk_ctx("!!!Remnant of size %d %d %d\n", err, h->nlmsg_len, h->nlmsg_type);
+			err = -EINVAL;
+			break;
+		}
+	}
+done:
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+	if (!err && req.g.rtgen_family == AF_INET) {
+		req.g.rtgen_family = AF_INET6;
+		iov.iov_base=&req;
+		iov.iov_len=sizeof(req);
+		msg.msg_name=&nladdr;
+		msg.msg_namelen=sizeof(nladdr);
+		msg.msg_iov=&iov;
+		msg.msg_iovlen=1;
+		msg.msg_control=NULL;
+		msg.msg_controllen=0;
+		msg.msg_flags=MSG_DONTWAIT;
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		err = sock_sendmsg(sock, &msg, sizeof(req));
+		set_fs(oldfs);
+
+		if (err > 0)
+			goto restart;
+	}
+#endif
+	ctx->align(ctx);
+	cpt_close_object(ctx);
+	cpt_close_section(ctx);
+
+out_sock_pg:
+	free_page((unsigned long)pg);
+out_sock:
+	sock_release(sock);
+	return err;
+}
+
+static int dumpfn(void *arg)
+{
+	int i;
+	int *pfd = arg;
+	char *argv[] = { "iptables-save", "-c", NULL };
+
+	i = real_env_create(VEID(get_exec_env()), VE_ENTER|VE_SKIPLOCK, 2, NULL, 0);
+	if (i < 0) {
+		eprintk("cannot enter ve to dump iptables\n");
+		module_put(THIS_MODULE);
+		return 255 << 8;
+	}
+
+	if (pfd[1] != 1)
+		sc_dup2(pfd[1], 1);
+
+	for (i=0; i<current->files->fdt->max_fds; i++) {
+		if (i != 1)
+			sc_close(i);
+	}
+
+	module_put(THIS_MODULE);
+
+	set_fs(KERNEL_DS);
+	i = sc_execve("/sbin/iptables-save", argv, NULL);
+	if (i == -ENOENT)
+		i = sc_execve("/usr/sbin/iptables-save", argv, NULL);
+	eprintk("failed to exec iptables-save: %d\n", i);
+	return 255 << 8;
+}
+
+
+static int cpt_dump_iptables(struct cpt_context * ctx)
+{
+	int err;
+	int pid;
+	int pfd[2];
+	struct file *f;
+	struct cpt_object_hdr v;
+	char buf[16];
+	loff_t pos;
+	int n;
+	int status;
+	mm_segment_t oldfs;
+	sigset_t ignore, blocked;
+
+	if (!(get_exec_env()->_iptables_modules & VE_IP_IPTABLES_MOD))
+		return 0;
+
+	err = sc_pipe(pfd);
+	if (err < 0) {
+		eprintk_ctx("sc_pipe: %d\n", err);
+		return err;
+	}
+	ignore.sig[0] = CPT_SIG_IGNORE_MASK;
+	sigprocmask(SIG_BLOCK, &ignore, &blocked);
+	err = pid = local_kernel_thread(dumpfn, (void*)pfd, SIGCHLD, 0);
+	if (err < 0) {
+		eprintk_ctx("local_kernel_thread: %d\n", err);
+		goto out;
+	}
+	f = fget(pfd[0]);
+	sc_close(pfd[1]);
+	sc_close(pfd[0]);
+
+	cpt_open_section(ctx, CPT_SECT_NET_IPTABLES);
+
+	cpt_open_object(NULL, ctx);
+	v.cpt_next = CPT_NULL;
+	v.cpt_object = CPT_OBJ_NAME;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_NAME;
+
+	ctx->write(&v, sizeof(v), ctx);
+
+	pos = ctx->file->f_pos;
+	do {
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		n = f->f_op->read(f, buf, sizeof(buf), &f->f_pos);
+		set_fs(oldfs);
+		if (n > 0)
+			ctx->write(buf, n, ctx);
+	} while (n > 0);
+
+	if (n < 0)
+		eprintk_ctx("read: %d\n", n);
+
+	fput(f);
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	if ((err = sc_waitx(pid, 0, &status)) < 0)
+		eprintk_ctx("wait4: %d\n", err);
+	else if ((status & 0x7f) == 0) {
+		err = (status & 0xff00) >> 8;
+		if (err != 0) {
+			eprintk_ctx("iptables-save exited with %d\n", err);
+			err = -EINVAL;
+		}
+	} else {
+		eprintk_ctx("iptables-save terminated\n");
+		err = -EINVAL;
+	}
+	set_fs(oldfs);
+	sigprocmask(SIG_SETMASK, &blocked, NULL);
+
+	if (ctx->file->f_pos != pos) {
+		buf[0] = 0;
+		ctx->write(buf, 1, ctx);
+		ctx->align(ctx);
+		cpt_close_object(ctx);
+		cpt_close_section(ctx);
+	} else {
+		pos = ctx->current_section;
+		cpt_close_object(ctx);
+		cpt_close_section(ctx);
+		ctx->sections[CPT_SECT_NET_IPTABLES] = CPT_NULL;
+		ctx->file->f_pos = pos;
+	}
+	return n ? : err;
+
+out:
+	if (pfd[1] >= 0)
+		sc_close(pfd[1]);
+	if (pfd[0] >= 0)
+		sc_close(pfd[0]);
+	sigprocmask(SIG_SETMASK, &blocked, NULL);
+	return err;
+}
+
+int cpt_dump_ifinfo(struct cpt_context * ctx)
+{
+	int err;
+
+	rtnl_lock();
+	err = cpt_dump_link(ctx);
+	if (!err)
+		err = cpt_dump_ifaddr(ctx);
+	rtnl_unlock();
+	if (!err)
+		err = cpt_dump_route(ctx);
+	if (!err)
+		err = cpt_dump_iptables(ctx);
+	return err;
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_net.h linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_net.h
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_net.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_net.h	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,7 @@
+int cpt_dump_ifinfo(struct cpt_context *ctx);
+int rst_restore_net(struct cpt_context *ctx);
+int cpt_suspend_network(struct cpt_context *ctx);
+int cpt_resume_network(struct cpt_context *ctx);
+int rst_resume_network(struct cpt_context *ctx);
+int cpt_dump_ip_conntrack(struct cpt_context *ctx);
+int rst_restore_ip_conntrack(struct cpt_context * ctx);
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_obj.c linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_obj.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_obj.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_obj.c	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,163 @@
+/*
+ *
+ *  kernel/cpt/cpt_obj.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+cpt_object_t *alloc_cpt_object(int gfp, struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	obj = kmalloc(sizeof(cpt_object_t), gfp);
+	if (obj) {
+		INIT_LIST_HEAD(&obj->o_list);
+		INIT_LIST_HEAD(&obj->o_hash);
+		INIT_LIST_HEAD(&obj->o_alist);
+		obj->o_count = 1;
+		obj->o_pos = CPT_NULL;
+		obj->o_lock = 0;
+		obj->o_parent = NULL;
+		obj->o_index = CPT_NOINDEX;
+		obj->o_obj = NULL;
+		obj->o_image = NULL;
+		ctx->objcount++;
+	}
+	return obj;
+}
+
+void free_cpt_object(cpt_object_t *obj, cpt_context_t *ctx)
+{
+	list_del(&obj->o_alist);
+	kfree(obj);
+	ctx->objcount--;
+}
+
+void intern_cpt_object(enum _cpt_object_type type, cpt_object_t *obj, cpt_context_t *ctx)
+{
+	list_add_tail(&obj->o_list, &ctx->object_array[type]);
+}
+
+void insert_cpt_object(enum _cpt_object_type type, cpt_object_t *obj,
+			cpt_object_t *head, cpt_context_t *ctx)
+{
+	list_add(&obj->o_list, &head->o_list);
+}
+
+cpt_object_t * __cpt_object_add(enum _cpt_object_type type, void *p,
+		unsigned gfp_mask, cpt_context_t *ctx)
+{
+	cpt_object_t *obj;
+
+	obj = lookup_cpt_object(type, p, ctx);
+
+	if (obj) {
+		obj->o_count++;
+		return obj;
+	}
+
+	if ((obj = alloc_cpt_object(gfp_mask, ctx)) != NULL) {
+		if (p)
+			cpt_obj_setobj(obj, p, ctx);
+		intern_cpt_object(type, obj, ctx);
+		return obj;
+	}
+	return NULL;
+}
+
+cpt_object_t * cpt_object_add(enum _cpt_object_type type, void *p, cpt_context_t *ctx)
+{
+	return __cpt_object_add(type, p, GFP_KERNEL, ctx);
+}
+
+cpt_object_t * cpt_object_get(enum _cpt_object_type type, void *p, cpt_context_t *ctx)
+{
+	cpt_object_t *obj;
+
+	obj = lookup_cpt_object(type, p, ctx);
+
+	if (obj)
+		obj->o_count++;
+
+	return obj;
+}
+
+int cpt_object_init(cpt_context_t *ctx)
+{
+	int i;
+
+	for (i=0; i<CPT_OBJ_MAX; i++) {
+		INIT_LIST_HEAD(&ctx->object_array[i]);
+	}
+	return 0;
+}
+
+int cpt_object_destroy(cpt_context_t *ctx)
+{
+	int i;
+
+	for (i=0; i<CPT_OBJ_MAX; i++) {
+		while (!list_empty(&ctx->object_array[i])) {
+			struct list_head *head = ctx->object_array[i].next;
+			cpt_object_t *obj = list_entry(head, cpt_object_t, o_list);
+			list_del(head);
+			if (obj->o_image)
+				kfree(obj->o_image);
+			free_cpt_object(obj, ctx);
+		}
+	}
+	if (ctx->objcount != 0)
+		eprintk_ctx("BUG: ctx->objcount=%d\n", ctx->objcount);
+	return 0;
+}
+
+cpt_object_t *lookup_cpt_object(enum _cpt_object_type type, void *p, struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, type) {
+		if (obj->o_obj == p)
+			return obj;
+	}
+	return NULL;
+}
+
+cpt_object_t *lookup_cpt_obj_bypos(enum _cpt_object_type type, loff_t pos, struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, type) {
+		if (obj->o_pos == pos)
+			return obj;
+	}
+	return NULL;
+}
+
+cpt_object_t *lookup_cpt_obj_byindex(enum _cpt_object_type type, __u32 index, struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, type) {
+		if (obj->o_index == index)
+			return obj;
+	}
+	return NULL;
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_obj.h linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_obj.h
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_obj.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_obj.h	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,62 @@
+#ifndef __CPT_OBJ_H_
+#define __CPT_OBJ_H_ 1
+
+#include <linux/list.h>
+#include <linux/cpt_image.h>
+
+typedef struct _cpt_object
+{
+	struct list_head	o_list;
+	struct list_head	o_hash;
+	int			o_count;
+	int			o_index;
+	int			o_lock;
+	loff_t			o_pos;
+	loff_t			o_ppos;
+	void			*o_obj;
+	void			*o_image;
+	void			*o_parent;
+	struct list_head	o_alist;
+} cpt_object_t;
+
+struct cpt_context;
+
+#define for_each_object(obj, type) list_for_each_entry(obj, &ctx->object_array[type], o_list)
+
+
+extern cpt_object_t *alloc_cpt_object(int gfp, struct cpt_context *ctx);
+extern void free_cpt_object(cpt_object_t *obj, struct cpt_context *ctx);
+
+cpt_object_t *lookup_cpt_object(enum _cpt_object_type type, void *p, struct cpt_context *ctx);
+cpt_object_t *lookup_cpt_obj_bypos(enum _cpt_object_type type, loff_t pos, struct cpt_context *ctx);
+cpt_object_t *lookup_cpt_obj_byindex(enum _cpt_object_type type, __u32 index, struct cpt_context *ctx);
+
+static inline void cpt_obj_setpos(cpt_object_t *cpt, loff_t pos, struct cpt_context *ctx)
+{
+	cpt->o_pos = pos;
+	/* Add to pos hash table */
+}
+
+static inline void cpt_obj_setobj(cpt_object_t *cpt, void *ptr, struct cpt_context *ctx)
+{
+	cpt->o_obj = ptr;
+	/* Add to hash table */
+}
+
+static inline void cpt_obj_setindex(cpt_object_t *cpt, __u32 index, struct cpt_context *ctx)
+{
+	cpt->o_index = index;
+	/* Add to index hash table */
+}
+
+
+extern void intern_cpt_object(enum _cpt_object_type type, cpt_object_t *obj, struct cpt_context *ctx);
+extern void insert_cpt_object(enum _cpt_object_type type, cpt_object_t *obj, cpt_object_t *head, struct cpt_context *ctx);
+extern cpt_object_t *cpt_object_add(enum _cpt_object_type type, void *p, struct cpt_context *ctx);
+extern cpt_object_t *__cpt_object_add(enum _cpt_object_type type, void *p, unsigned int gfp_mask, struct cpt_context *ctx);
+extern cpt_object_t *cpt_object_get(enum _cpt_object_type type, void *p, struct cpt_context *ctx);
+
+extern int cpt_object_init(struct cpt_context *ctx);
+extern int cpt_object_destroy(struct cpt_context *ctx);
+
+#endif /* __CPT_OBJ_H_ */
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_proc.c linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_proc.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_proc.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_proc.c	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,595 @@
+/*
+ *
+ *  kernel/cpt/cpt_proc.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/list.h>
+#include <linux/proc_fs.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+#include <linux/cpt_ioctl.h>
+#include <linux/delay.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_dump.h"
+#include "cpt_mm.h"
+#include "cpt_kernel.h"
+
+MODULE_AUTHOR("Alexey Kuznetsov <alexey@sw.ru>");
+MODULE_LICENSE("GPL");
+
+/* List of contexts and lock protecting the list */
+static struct list_head cpt_context_list;
+static spinlock_t cpt_context_lock;
+
+static int proc_read(char *buffer, char **start, off_t offset,
+		     int length, int *eof, void *data)
+{
+	off_t pos = 0;
+	off_t begin = 0;
+	int len = 0;
+	cpt_context_t *ctx;
+
+	len += sprintf(buffer, "Ctx      Id       VE       State\n");
+
+	spin_lock(&cpt_context_lock);
+
+	list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
+		len += sprintf(buffer+len,"%p %08x %-8u %d",
+			       ctx,
+			       ctx->contextid,
+			       ctx->ve_id,
+			       ctx->ctx_state
+			       );
+
+		buffer[len++] = '\n';
+
+		pos = begin+len;
+		if (pos < offset) {
+			len = 0;
+			begin = pos;
+		}
+		if (pos > offset+length)
+			goto done;
+	}
+	*eof = 1;
+
+done:
+	spin_unlock(&cpt_context_lock);
+	*start = buffer + (offset - begin);
+	len -= (offset - begin);
+	if(len > length)
+		len = length;
+	if(len < 0)
+		len = 0;
+	return len;
+}
+
+void cpt_context_release(cpt_context_t *ctx)
+{
+	list_del(&ctx->ctx_list);
+	spin_unlock(&cpt_context_lock);
+
+	if (ctx->ctx_state > 0)
+		cpt_resume(ctx);
+	ctx->ctx_state = CPT_CTX_ERROR;
+
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	if (ctx->pgin_task)
+		put_task_struct(ctx->pgin_task);
+	if (ctx->pgin_dir)
+		cpt_free_pgin_dir(ctx);
+	if (ctx->pagein_file_out)
+		fput(ctx->pagein_file_out);
+	if (ctx->pagein_file_in)
+		fput(ctx->pagein_file_in);
+#endif
+	if (ctx->objcount)
+		eprintk_ctx("%d objects leaked\n", ctx->objcount);
+	if (ctx->file)
+		fput(ctx->file);
+	cpt_flush_error(ctx);
+	if (ctx->errorfile) {
+		fput(ctx->errorfile);
+		ctx->errorfile = NULL;
+	}
+	if (ctx->error_msg) {
+		free_page((unsigned long)ctx->error_msg);
+		ctx->error_msg = NULL;
+	}
+	if (ctx->statusfile)
+		fput(ctx->statusfile);
+	if (ctx->lockfile)
+		fput(ctx->lockfile);
+	kfree(ctx);
+
+	spin_lock(&cpt_context_lock);
+}
+
+static void __cpt_context_put(cpt_context_t *ctx)
+{
+	if (!--ctx->refcount)
+		cpt_context_release(ctx);
+}
+
+static void cpt_context_put(cpt_context_t *ctx)
+{
+	spin_lock(&cpt_context_lock);
+	__cpt_context_put(ctx);
+	spin_unlock(&cpt_context_lock);
+}
+
+cpt_context_t * cpt_context_open(void)
+{
+	cpt_context_t *ctx;
+
+	if ((ctx = kmalloc(sizeof(*ctx), GFP_KERNEL)) != NULL) {
+		cpt_context_init(ctx);
+		spin_lock(&cpt_context_lock);
+		list_add_tail(&ctx->ctx_list, &cpt_context_list);
+		spin_unlock(&cpt_context_lock);
+		ctx->error_msg = (char*)__get_free_page(GFP_KERNEL);
+		if (ctx->error_msg != NULL)
+			ctx->error_msg[0] = 0;
+	}
+	return ctx;
+}
+
+static cpt_context_t * cpt_context_lookup(unsigned int contextid)
+{
+	cpt_context_t *ctx;
+
+	spin_lock(&cpt_context_lock);
+	list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
+		if (ctx->contextid == contextid) {
+			ctx->refcount++;
+			spin_unlock(&cpt_context_lock);
+			return ctx;
+		}
+	}
+	spin_unlock(&cpt_context_lock);
+	return NULL;
+}
+
+int cpt_context_lookup_veid(unsigned int veid)
+{
+	cpt_context_t *ctx;
+
+	spin_lock(&cpt_context_lock);
+	list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
+		if (ctx->ve_id == veid && ctx->ctx_state > 0) {
+			spin_unlock(&cpt_context_lock);
+			return 1;
+		}
+	}
+	spin_unlock(&cpt_context_lock);
+	return 0;
+}
+
+static int cpt_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg)
+{
+	int err = 0;
+	cpt_context_t *ctx;
+	struct file *dfile = NULL;
+	int try;
+
+	unlock_kernel();
+
+	if (cmd == CPT_VMPREP) {
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+		err = cpt_mm_prepare(arg);
+#else
+		err = -EINVAL;
+#endif
+		goto out_lock;
+	}
+
+	if (cmd == CPT_TEST_CAPS) {
+		unsigned int src_flags, dst_flags = arg;
+
+		err = 0;
+		src_flags = test_cpu_caps();
+		test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_CMOV, "cmov", err);
+		test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_FXSR, "fxsr", err);
+		test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_SSE, "sse", err);
+		test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_SSE2, "sse2", err);
+		test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_MMX, "mmx", err);
+		test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_3DNOW, "3dnow", err);
+		test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_3DNOW2, "3dnowext", err);
+		test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_SEP, "sysenter", err);
+		goto out_lock;
+	}
+
+	if (cmd == CPT_JOIN_CONTEXT || cmd == CPT_PUT_CONTEXT) {
+		cpt_context_t *old_ctx;
+
+		ctx = NULL;
+		if (cmd == CPT_JOIN_CONTEXT) {
+			err = -ENOENT;
+			ctx = cpt_context_lookup(arg);
+			if (!ctx)
+				goto out_lock;
+		}
+
+		spin_lock(&cpt_context_lock);
+		old_ctx = (cpt_context_t*)file->private_data;
+		file->private_data = ctx;
+
+		if (old_ctx) {
+			if (cmd == CPT_PUT_CONTEXT && old_ctx->sticky) {
+				old_ctx->sticky = 0;
+				old_ctx->refcount--;
+			}
+			__cpt_context_put(old_ctx);
+		}
+		spin_unlock(&cpt_context_lock);
+		err = 0;
+		goto out_lock;
+	}
+
+	spin_lock(&cpt_context_lock);
+	ctx = (cpt_context_t*)file->private_data;
+	if (ctx)
+		ctx->refcount++;
+	spin_unlock(&cpt_context_lock);
+
+	if (!ctx) {
+		cpt_context_t *old_ctx;
+
+		err = -ENOMEM;
+		ctx = cpt_context_open();
+		if (!ctx)
+			goto out_lock;
+
+		spin_lock(&cpt_context_lock);
+		old_ctx = (cpt_context_t*)file->private_data;
+		if (!old_ctx) {
+			ctx->refcount++;
+			file->private_data = ctx;
+		} else {
+			old_ctx->refcount++;
+		}
+		if (old_ctx) {
+			__cpt_context_put(ctx);
+			ctx = old_ctx;
+		}
+		spin_unlock(&cpt_context_lock);
+	}
+
+	if (cmd == CPT_GET_CONTEXT) {
+		unsigned int contextid = (unsigned int)arg;
+
+		if (ctx->contextid && ctx->contextid != contextid) {
+			err = -EINVAL;
+			goto out_nosem;
+		}
+		if (!ctx->contextid) {
+			cpt_context_t *c1 = cpt_context_lookup(contextid);
+			if (c1) {
+				cpt_context_put(c1);
+				err = -EEXIST;
+				goto out_nosem;
+			}
+			ctx->contextid = contextid;
+		}
+		spin_lock(&cpt_context_lock);
+		if (!ctx->sticky) {
+			ctx->sticky = 1;
+			ctx->refcount++;
+		}
+		spin_unlock(&cpt_context_lock);
+		goto out_nosem;
+	}
+
+	down(&ctx->main_sem);
+
+	err = -EBUSY;
+	if (ctx->ctx_state < 0)
+		goto out;
+
+	err = 0;
+	switch (cmd) {
+	case CPT_SET_DUMPFD:
+		if (ctx->ctx_state == CPT_CTX_DUMPING) {
+			err = -EBUSY;
+			break;
+		}
+		if (arg >= 0) {
+			err = -EBADF;
+			dfile = fget(arg);
+			if (dfile == NULL)
+				break;
+			if (dfile->f_op == NULL ||
+			    dfile->f_op->write == NULL) {
+				fput(dfile);
+				break;
+			}
+			err = 0;
+		}
+		if (ctx->file)
+			fput(ctx->file);
+		ctx->file = dfile;
+		break;
+	case CPT_SET_ERRORFD:
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (dfile == NULL) {
+				err = -EBADF;
+				break;
+			}
+		}
+		if (ctx->errorfile)
+			fput(ctx->errorfile);
+		ctx->errorfile = dfile;
+		break;
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	case CPT_SET_PAGEINFDIN:
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (dfile == NULL) {
+				err = -EBADF;
+				break;
+			}
+		}
+		if (ctx->pagein_file_in)
+			fput(ctx->pagein_file_in);
+		ctx->pagein_file_in = dfile;
+		break;
+	case CPT_SET_PAGEINFDOUT:
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (dfile == NULL) {
+				err = -EBADF;
+				break;
+			}
+		}
+		if (ctx->pagein_file_out)
+			fput(ctx->pagein_file_out);
+		ctx->pagein_file_out = dfile;
+		break;
+	case CPT_SET_LAZY:
+		ctx->lazy_vm = arg;
+		break;
+	case CPT_ITER:
+		err = cpt_iteration(ctx);
+		break;
+	case CPT_PAGEIND:
+		err = cpt_start_pagein(ctx);
+		break;
+#endif
+	case CPT_SET_VEID:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		ctx->ve_id = arg;
+		break;
+	case CPT_SET_CPU_FLAGS:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		ctx->dst_cpu_flags = arg;
+		ctx->src_cpu_flags = test_cpu_caps();
+		break;
+	case CPT_SUSPEND:
+		if (cpt_context_lookup_veid(ctx->ve_id) ||
+		    ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		ctx->ctx_state = CPT_CTX_SUSPENDING;
+		try = 0;
+		do {
+			err = cpt_vps_suspend(ctx);
+			if (err)
+				cpt_resume(ctx);
+			if (err == -EAGAIN)
+				msleep(1000);
+			try++;
+		} while (err == -EAGAIN && try < 3);
+		if (err) {
+			ctx->ctx_state = CPT_CTX_IDLE;
+		} else {
+			ctx->ctx_state = CPT_CTX_SUSPENDED;
+		}
+		break;
+	case CPT_DUMP:
+		if (!ctx->ctx_state) {
+			err = -ENOENT;
+			break;
+		}
+		if (!ctx->file) {
+			err = -EBADF;
+			break;
+		}
+		err = cpt_dump(ctx);
+		break;
+	case CPT_RESUME:
+		if (ctx->ctx_state == CPT_CTX_IDLE) {
+			err = -ENOENT;
+			break;
+		}
+		err = cpt_resume(ctx);
+		if (!err)
+			ctx->ctx_state = CPT_CTX_IDLE;
+		break;
+	case CPT_KILL:
+		if (ctx->ctx_state == CPT_CTX_IDLE) {
+			err = -ENOENT;
+			break;
+		}
+		err = cpt_kill(ctx);
+		if (!err)
+			ctx->ctx_state = CPT_CTX_IDLE;
+		break;
+	case CPT_TEST_VECAPS:
+	{
+		__u32 dst_flags = arg;
+		__u32 src_flags;
+
+		err = cpt_vps_caps(ctx, &src_flags);
+		if (err)
+			break;
+
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_CMOV, "cmov", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_FXSR, "fxsr", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SSE, "sse", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SSE2, "sse2", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_MMX, "mmx", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_3DNOW, "3dnow", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_3DNOW2, "3dnowext", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SEP, "sysenter", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_EMT64, "emt64", err);
+		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_IA64, "ia64", err);
+		if (src_flags & CPT_UNSUPPORTED_MASK)
+			err = 1;
+		break;
+	}
+	default:
+		err = -EINVAL;
+		break;
+	}
+
+out:
+	cpt_flush_error(ctx);
+	up(&ctx->main_sem);
+out_nosem:
+	cpt_context_put(ctx);
+out_lock:
+	lock_kernel();
+	if (err == -ERESTARTSYS || err == -ERESTARTNOINTR ||
+	    err == -ERESTARTNOHAND || err == -ERESTART_RESTARTBLOCK)
+		err = -EINTR;
+	return err;
+}
+
+static int cpt_open(struct inode *inode, struct file *file)
+{
+	if (!try_module_get(THIS_MODULE))
+		return -EBUSY;
+
+	return 0;
+}
+
+static int cpt_release(struct inode * inode, struct file * file)
+{
+	cpt_context_t *ctx;
+
+	spin_lock(&cpt_context_lock);
+	ctx = (cpt_context_t*)file->private_data;
+	file->private_data = NULL;
+
+	if (ctx)
+		__cpt_context_put(ctx);
+	spin_unlock(&cpt_context_lock);
+
+	module_put(THIS_MODULE);
+	return 0;
+}
+
+
+static struct file_operations cpt_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = cpt_open,
+	.release = cpt_release,
+	.ioctl	 = cpt_ioctl,
+};
+
+static struct proc_dir_entry *proc_ent;
+
+static struct ctl_table_header *ctl_header;
+
+static ctl_table debug_table[] = {
+	{
+		.ctl_name	= 9475,
+		.procname	= "cpt",
+		.data		= &debug_level,
+		.maxlen		= sizeof(debug_level),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{ .ctl_name = 0 }
+};
+static ctl_table root_table[] = {
+	{
+		.ctl_name	= CTL_DEBUG,
+		.procname	= "debug",
+		.mode		= 0555,
+		.child		= debug_table,
+	},
+	{ .ctl_name = 0 }
+};
+
+static int __init init_cpt(void)
+{
+	int err;
+
+	err = -ENOMEM;
+	ctl_header = register_sysctl_table(root_table, 0);
+	if (!ctl_header)
+		goto err_mon;
+
+	spin_lock_init(&cpt_context_lock);
+	INIT_LIST_HEAD(&cpt_context_list);
+
+	err = -EINVAL;
+	proc_ent = create_proc_entry_mod("cpt", 0600, NULL, THIS_MODULE);
+	if (!proc_ent)
+		goto err_out;
+
+	cpt_fops.read = proc_ent->proc_fops->read;
+	cpt_fops.write = proc_ent->proc_fops->write;
+	cpt_fops.llseek = proc_ent->proc_fops->llseek;
+	proc_ent->proc_fops = &cpt_fops;
+
+	proc_ent->read_proc = proc_read;
+	proc_ent->data = NULL;
+	proc_ent->owner = THIS_MODULE;
+	return 0;
+
+err_out:
+	unregister_sysctl_table(ctl_header);
+err_mon:
+	return err;
+}
+module_init(init_cpt);
+
+static void __exit exit_cpt(void)
+{
+	remove_proc_entry("cpt", NULL);
+	unregister_sysctl_table(ctl_header);
+
+	spin_lock(&cpt_context_lock);
+	while (!list_empty(&cpt_context_list)) {
+		cpt_context_t *ctx;
+		ctx = list_entry(cpt_context_list.next, cpt_context_t, ctx_list);
+
+		if (!ctx->sticky)
+			ctx->refcount++;
+		ctx->sticky = 0;
+
+		BUG_ON(ctx->refcount != 1);
+
+		__cpt_context_put(ctx);
+	}
+	spin_unlock(&cpt_context_lock);
+}
+module_exit(exit_cpt);
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_process.c linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_process.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_process.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_process.c	2007-08-28 17:35:36.000000000 +0400
@@ -0,0 +1,1291 @@
+/*
+ *
+ *  kernel/cpt/cpt_process.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/compat.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_ubc.h"
+#include "cpt_process.h"
+#include "cpt_kernel.h"
+
+#ifdef CONFIG_X86_32
+#undef task_pt_regs
+#define task_pt_regs(t) ((struct pt_regs *)((t)->thread.esp0) - 1)
+#endif
+
+#ifdef CONFIG_X86
+
+static u32 encode_segment(u32 segreg)
+{
+	segreg &= 0xFFFF;
+
+	if (segreg == 0)
+		return CPT_SEG_ZERO;
+	if ((segreg & 3) != 3) {
+		wprintk("Invalid RPL of a segment reg %x\n", segreg);
+		return CPT_SEG_ZERO;
+	}
+
+	/* LDT descriptor, it is just an index to LDT array */
+	if (segreg & 4)
+		return CPT_SEG_LDT + (segreg >> 3);
+
+	/* TLS descriptor. */
+	if ((segreg >> 3) >= GDT_ENTRY_TLS_MIN &&
+	    (segreg >> 3) <= GDT_ENTRY_TLS_MAX)
+		return CPT_SEG_TLS1 + ((segreg>>3) - GDT_ENTRY_TLS_MIN);
+
+	/* One of standard desriptors */
+#ifdef CONFIG_X86_64
+	if (segreg == __USER32_DS)
+		return CPT_SEG_USER32_DS;
+	if (segreg == __USER32_CS)
+		return CPT_SEG_USER32_CS;
+	if (segreg == __USER_DS)
+		return CPT_SEG_USER64_DS;
+	if (segreg == __USER_CS)
+		return CPT_SEG_USER64_CS;
+#else
+	if (segreg == __USER_DS)
+		return CPT_SEG_USER32_DS;
+	if (segreg == __USER_CS)
+		return CPT_SEG_USER32_CS;
+#endif
+	wprintk("Invalid segment reg %x\n", segreg);
+	return CPT_SEG_ZERO;
+}
+
+#ifdef CONFIG_X86_64
+static void xlate_ptregs_64_to_32(struct cpt_x86_regs *d, struct pt_regs *s, task_t *tsk)
+{
+	d->cpt_ebp = s->rbp;
+	d->cpt_ebx = s->rbx;
+	d->cpt_eax = s->rax;
+	d->cpt_ecx = s->rcx;
+	d->cpt_edx = s->rdx;
+	d->cpt_esi = s->rsi;
+	d->cpt_edi = s->rdi;
+	d->cpt_orig_eax = s->orig_rax;
+	d->cpt_eip = s->rip;
+	d->cpt_xcs = encode_segment(s->cs);
+	d->cpt_eflags = s->eflags;
+	d->cpt_esp = s->rsp;
+	d->cpt_xss = encode_segment(s->ss);
+	d->cpt_xds = encode_segment(tsk->thread.ds);
+	d->cpt_xes = encode_segment(tsk->thread.es);
+}
+
+static int dump_registers(task_t *tsk, struct cpt_context *ctx)
+{
+	cpt_open_object(NULL, ctx);
+
+	if (tsk->thread_info->flags&_TIF_IA32) {
+		struct cpt_x86_regs ri;
+		ri.cpt_next = sizeof(ri);
+		ri.cpt_object = CPT_OBJ_X86_REGS;
+		ri.cpt_hdrlen = sizeof(ri);
+		ri.cpt_content = CPT_CONTENT_VOID;
+
+		ri.cpt_debugreg[0] = tsk->thread.debugreg0;
+		ri.cpt_debugreg[1] = tsk->thread.debugreg1;
+		ri.cpt_debugreg[2] = tsk->thread.debugreg2;
+		ri.cpt_debugreg[3] = tsk->thread.debugreg3;
+		ri.cpt_debugreg[4] = 0;
+		ri.cpt_debugreg[5] = 0;
+		ri.cpt_debugreg[6] = tsk->thread.debugreg6;
+		ri.cpt_debugreg[7] = tsk->thread.debugreg7;
+		ri.cpt_fs = encode_segment(tsk->thread.fsindex);
+		ri.cpt_gs = encode_segment(tsk->thread.gsindex);
+
+		xlate_ptregs_64_to_32(&ri, task_pt_regs(tsk), tsk);
+
+		ctx->write(&ri, sizeof(ri), ctx);
+	} else {
+		struct cpt_x86_64_regs ri;
+		ri.cpt_next = sizeof(ri);
+		ri.cpt_object = CPT_OBJ_X86_64_REGS;
+		ri.cpt_hdrlen = sizeof(ri);
+		ri.cpt_content = CPT_CONTENT_VOID;
+
+		ri.cpt_fsbase = tsk->thread.fs;
+		ri.cpt_gsbase = tsk->thread.gs;
+		ri.cpt_fsindex = encode_segment(tsk->thread.fsindex);
+		ri.cpt_gsindex = encode_segment(tsk->thread.gsindex);
+		ri.cpt_ds = encode_segment(tsk->thread.ds);
+		ri.cpt_es = encode_segment(tsk->thread.es);
+		ri.cpt_debugreg[0] = tsk->thread.debugreg0;
+		ri.cpt_debugreg[1] = tsk->thread.debugreg1;
+		ri.cpt_debugreg[2] = tsk->thread.debugreg2;
+		ri.cpt_debugreg[3] = tsk->thread.debugreg3;
+		ri.cpt_debugreg[4] = 0;
+		ri.cpt_debugreg[5] = 0;
+		ri.cpt_debugreg[6] = tsk->thread.debugreg6;
+		ri.cpt_debugreg[7] = tsk->thread.debugreg7;
+
+		memcpy(&ri.cpt_r15, task_pt_regs(tsk), sizeof(struct pt_regs));
+
+		ri.cpt_cs = encode_segment(task_pt_regs(tsk)->cs);
+		ri.cpt_ss = encode_segment(task_pt_regs(tsk)->ss);
+
+		ctx->write(&ri, sizeof(ri), ctx);
+
+#if 0
+		if (ri.cpt_rip >= VSYSCALL_START && ri.cpt_rip < VSYSCALL_END) {
+			eprintk_ctx(CPT_FID "cannot be checkpointied while vsyscall, try later\n", CPT_TID(tsk));
+			return -EAGAIN;
+		}
+#endif
+	}
+	cpt_close_object(ctx);
+
+	return 0;
+}
+
+#else
+
+static int dump_registers(task_t *tsk, struct cpt_context *ctx)
+{
+	struct cpt_x86_regs ri;
+
+	cpt_open_object(NULL, ctx);
+
+	ri.cpt_next = sizeof(ri);
+	ri.cpt_object = CPT_OBJ_X86_REGS;
+	ri.cpt_hdrlen = sizeof(ri);
+	ri.cpt_content = CPT_CONTENT_VOID;
+
+	ri.cpt_debugreg[0] = tsk->thread.debugreg[0];
+	ri.cpt_debugreg[1] = tsk->thread.debugreg[1];
+	ri.cpt_debugreg[2] = tsk->thread.debugreg[2];
+	ri.cpt_debugreg[3] = tsk->thread.debugreg[3];
+	ri.cpt_debugreg[4] = tsk->thread.debugreg[4];
+	ri.cpt_debugreg[5] = tsk->thread.debugreg[5];
+	ri.cpt_debugreg[6] = tsk->thread.debugreg[6];
+	ri.cpt_debugreg[7] = tsk->thread.debugreg[7];
+	ri.cpt_fs = encode_segment(tsk->thread.fs);
+	ri.cpt_gs = encode_segment(tsk->thread.gs);
+
+	memcpy(&ri.cpt_ebx, task_pt_regs(tsk), sizeof(struct pt_regs));
+
+	ri.cpt_xcs = encode_segment(task_pt_regs(tsk)->xcs);
+	ri.cpt_xss = encode_segment(task_pt_regs(tsk)->xss);
+	ri.cpt_xds = encode_segment(task_pt_regs(tsk)->xds);
+        ri.cpt_xes = encode_segment(task_pt_regs(tsk)->xes);
+
+	ctx->write(&ri, sizeof(ri), ctx);
+	cpt_close_object(ctx);
+
+	return 0;
+}
+#endif
+#endif
+
+#ifdef CONFIG_IA64
+
+/*
+   PMD?
+ */
+
+#define _C(x) do { if ((err = (x)) < 0) { printk("atm:" CPT_FID #x " %d\n", \
+						 CPT_TID(tsk), err); return -EINVAL; } } while (0) 
+
+static int ass_to_mouth(struct cpt_ia64_regs *r, struct task_struct *tsk,
+			struct cpt_context *ctx)
+{
+	int err;
+	struct unw_frame_info info;
+	struct ia64_fpreg fpval;
+	int i;
+
+	unw_init_from_blocked_task(&info, tsk);
+	_C(unw_unwind_to_user(&info));
+
+	/* NAT_BITS */
+	do {
+		unsigned long scratch_unat;
+
+		scratch_unat = info.sw->caller_unat;
+		if (info.pri_unat_loc)
+			scratch_unat = *info.pri_unat_loc;
+
+		r->nat[0] = ia64_get_scratch_nat_bits(task_pt_regs(tsk), scratch_unat);
+		/* Just to be on safe side. */
+		r->nat[0] &= 0xFFFFFFFFUL;
+	} while (0);
+
+	/* R4-R7 */
+	for (i = 4; i <= 7; i++) {
+		char nat = 0;
+		_C(unw_access_gr(&info, i, &r->gr[i], &nat, 0));
+		r->nat[0] |= (nat != 0) << i;
+	}
+
+	/* B1-B5 */
+	for (i = 1; i <= 5; i++) {
+		_C(unw_access_br(&info, i, &r->br[i], 0));
+	}
+
+	/* AR_EC, AR_LC */
+	_C(unw_access_ar(&info, UNW_AR_EC, &r->ar_ec, 0));
+	_C(unw_access_ar(&info, UNW_AR_LC, &r->ar_lc, 0));
+
+	/* F2..F5, F16..F31 */
+	for (i = 2; i <= 5; i++) {
+		_C(unw_get_fr(&info, i, &fpval));
+		memcpy(&r->fr[i*2], &fpval, 16);
+	}
+	for (i = 16; i <= 31; i++) {
+		_C(unw_get_fr(&info, i, &fpval));
+		memcpy(&r->fr[i*2], &fpval, 16);
+	}
+	return 0;
+}
+
+#undef _C
+
+static int dump_registers(struct task_struct *tsk, struct cpt_context *ctx)
+{
+	int err;
+	unsigned long pg;
+	struct cpt_ia64_regs *r;
+	struct ia64_psr *psr;
+	struct switch_stack *sw;
+	struct pt_regs *pt;
+	void *krbs = (void *)tsk + IA64_RBS_OFFSET;
+	unsigned long reg;
+
+	if (tsk->exit_state)
+		return 0;
+
+	pt = task_pt_regs(tsk);
+
+	sw = (struct switch_stack *) (tsk->thread.ksp + 16);
+
+	if ((pg = __get_free_page(GFP_KERNEL)) == 0)
+		return -ENOMEM;
+
+	r = (void*)pg;
+	/* To catch if we forgot some register */
+	memset(r, 0xA5, sizeof(*r));
+
+	r->gr[0] = 0;
+	r->fr[0] = r->fr[1] = 0;
+	r->fr[2] = 0x8000000000000000UL;
+	r->fr[3] = 0xffff;
+
+	r->nat[0] = r->nat[1] = 0;
+
+	err = ass_to_mouth(r, tsk, ctx);
+	if (err) {
+		printk("ass_to_mouth error %d\n", err);
+		goto out;
+	}
+
+	/* gr 1,2-3,8-11,12-13,14,15,16-31 are on pt_regs */
+	memcpy(&r->gr[1], &pt->r1, 8*(2-1));
+	memcpy(&r->gr[2], &pt->r2, 8*(4-2));
+	memcpy(&r->gr[8], &pt->r8, 8*(12-8));
+	memcpy(&r->gr[12], &pt->r12, 8*(14-12));
+	memcpy(&r->gr[14], &pt->r14, 8*(15-14));
+	memcpy(&r->gr[15], &pt->r15, 8*(16-15));
+	memcpy(&r->gr[16], &pt->r16, 8*(32-16));
+
+	r->br[0] = pt->b0;
+	r->br[6] = pt->b6;
+	r->br[7] = pt->b7;
+
+	r->ar_bspstore = pt->ar_bspstore;
+	r->ar_unat = pt->ar_unat;
+	r->ar_pfs = pt->ar_pfs;
+	r->ar_ccv = pt->ar_ccv;
+	r->ar_fpsr = pt->ar_fpsr;
+	r->ar_csd = pt->ar_csd;
+	r->ar_ssd = pt->ar_ssd;
+	r->ar_rsc = pt->ar_rsc;
+
+	r->cr_iip = pt->cr_iip;
+	r->cr_ipsr = pt->cr_ipsr;
+
+	r->pr = pt->pr;
+
+	r->cfm = pt->cr_ifs;
+	r->ar_rnat = pt->ar_rnat;
+
+	/* fpregs 6..9,10..11 are in pt_regs */
+	memcpy(&r->fr[2*6], &pt->f6, 16*(10-6));
+	memcpy(&r->fr[2*10], &pt->f10, 16*(12-10));
+	/* fpreg 12..15 are on switch stack */
+	memcpy(&r->fr[2*12], &sw->f12, 16*(16-12));
+	/* fpregs 32...127 */
+	psr = ia64_psr(task_pt_regs(tsk));
+	preempt_disable();
+	if (ia64_is_local_fpu_owner(tsk) && psr->mfh) {
+		psr->mfh = 0;
+		tsk->thread.flags |= IA64_THREAD_FPH_VALID;
+		ia64_save_fpu(&tsk->thread.fph[0]);
+	}
+	preempt_enable();
+	memcpy(&r->fr[32*2], tsk->thread.fph, 16*(128-32));
+
+	if (tsk->thread.flags & IA64_THREAD_DBG_VALID) {
+		memcpy(r->ibr, tsk->thread.ibr, sizeof(r->ibr));
+		memcpy(r->dbr, tsk->thread.dbr, sizeof(r->ibr));
+	} else {
+		memset(r->ibr, 0, sizeof(r->ibr));
+		memset(r->dbr, 0, sizeof(r->dbr));
+	}
+
+	r->loadrs = pt->loadrs;
+	r->num_regs = ia64_rse_num_regs(krbs, krbs + 8*(pt->loadrs >> 19));
+	if ((long)pt->cr_ifs > 0)
+		r->num_regs += (pt->cr_ifs & 0x7f);
+
+	if (r->num_regs > 96) {
+		eprintk_ctx(CPT_FID " too much RSE regs %lu\n",
+			    CPT_TID(tsk), r->num_regs);
+		return -EINVAL;
+	}
+
+	for (reg = 0; reg < r->num_regs; reg++) {
+		unsigned long *ptr = ia64_rse_skip_regs(krbs, reg);
+		unsigned long *rnatp = ia64_rse_rnat_addr(ptr);
+
+		r->gr[32+reg] = *ptr;
+
+		if ((unsigned long)rnatp >= sw->ar_bspstore)
+			rnatp = &sw->ar_rnat;
+		if (*rnatp & (1UL<<ia64_rse_slot_num(ptr))) {
+			if (reg < 32)
+				r->nat[0] |= (1UL<<(reg+32));
+			else
+				r->nat[1] |= (1UL<<(reg-32));
+		}
+	}
+	if (r->nat[0] | r->nat[1])
+		wprintk_ctx(CPT_FID " nat bits %lx%016lx\n", CPT_TID(tsk),
+			    r->nat[1], r->nat[0]);
+
+	cpt_open_object(NULL, ctx);
+	r->cpt_next = sizeof(*r);
+	r->cpt_object = CPT_OBJ_IA64_REGS;
+	r->cpt_hdrlen = sizeof(*r);
+	r->cpt_content = CPT_CONTENT_VOID;
+	ctx->write(r, sizeof(*r), ctx);
+	cpt_close_object(ctx);
+	err = 0;
+
+out:
+	free_page(pg);
+	return err;
+}
+#endif
+
+static int dump_kstack(task_t *tsk, struct cpt_context *ctx)
+{
+	struct cpt_obj_bits hdr;
+	unsigned long size;
+	void *start;
+
+	cpt_open_object(NULL, ctx);
+
+#ifdef CONFIG_X86_64
+	size = tsk->thread.rsp0 - tsk->thread.rsp;
+	start = (void*)tsk->thread.rsp;
+#elif defined(CONFIG_X86_32)
+	size = tsk->thread.esp0 - tsk->thread.esp;
+	start = (void*)tsk->thread.esp;
+#elif defined(CONFIG_IA64)
+	size = (unsigned long)(task_pt_regs(tsk)+1) - tsk->thread.ksp;
+	start = (void*)tsk->thread.ksp;
+#else
+#error Arch is not supported
+#endif
+
+	hdr.cpt_next = sizeof(hdr) + CPT_ALIGN(size);
+	hdr.cpt_object = CPT_OBJ_BITS;
+	hdr.cpt_hdrlen = sizeof(hdr);
+	hdr.cpt_content = CPT_CONTENT_STACK;
+	hdr.cpt_size = size;
+
+	ctx->write(&hdr, sizeof(hdr), ctx);
+	ctx->write(start, size, ctx);
+	ctx->align(ctx);
+	cpt_close_object(ctx);
+	return 0;
+}
+
+#ifdef CONFIG_X86
+/* Formats of i387_fxsave_struct are the same for x86_64
+ * and i386. Plain luck. */
+
+static int dump_fpustate(task_t *tsk, struct cpt_context *ctx)
+{
+	struct cpt_obj_bits hdr;
+	unsigned long size;
+	int type;
+
+	cpt_open_object(NULL, ctx);
+
+	type = CPT_CONTENT_X86_FPUSTATE;
+	size = sizeof(struct i387_fxsave_struct);
+#ifndef CONFIG_X86_64
+	if (!cpu_has_fxsr) {
+		size = sizeof(struct i387_fsave_struct);
+		type = CPT_CONTENT_X86_FPUSTATE_OLD;
+	}
+#endif
+
+	hdr.cpt_next = sizeof(hdr) + CPT_ALIGN(size);
+	hdr.cpt_object = CPT_OBJ_BITS;
+	hdr.cpt_hdrlen = sizeof(hdr);
+	hdr.cpt_content = type;
+	hdr.cpt_size = size;
+
+	ctx->write(&hdr, sizeof(hdr), ctx);
+	ctx->write(&tsk->thread.i387, size, ctx);
+	ctx->align(ctx);
+	cpt_close_object(ctx);
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_IA64
+
+static int dump_fpustate(struct task_struct *tsk, struct cpt_context *ctx)
+{
+	return 0;
+}
+#endif
+
+static int encode_siginfo(struct cpt_siginfo_image *si, siginfo_t *info)
+{
+	si->cpt_signo = info->si_signo;
+	si->cpt_errno = info->si_errno;
+	si->cpt_code = info->si_code;
+
+	switch(si->cpt_code & __SI_MASK) {
+	case __SI_TIMER:
+		si->cpt_pid = info->si_tid;
+		si->cpt_uid = info->si_overrun;
+		si->cpt_sigval = cpt_ptr_export(info->_sifields._timer._sigval.sival_ptr);
+		si->cpt_utime = info->si_sys_private;
+		break;
+	case __SI_POLL:
+		si->cpt_pid = info->si_band;
+		si->cpt_uid = info->si_fd;
+		break;
+	case __SI_FAULT:
+		si->cpt_sigval = cpt_ptr_export(info->si_addr);
+#ifdef __ARCH_SI_TRAPNO
+		si->cpt_pid = info->si_trapno;
+#endif
+		break;
+	case __SI_CHLD:
+		si->cpt_pid = is_virtual_pid(info->si_pid) ? info->si_pid : pid_type_to_vpid(PIDTYPE_PID, info->si_pid);
+		si->cpt_uid = info->si_uid;
+		si->cpt_sigval = info->si_status;
+		si->cpt_stime = info->si_stime;
+		si->cpt_utime = info->si_utime;
+		break;
+	case __SI_KILL:
+	case __SI_RT:
+	case __SI_MESGQ:
+	default:
+		si->cpt_pid = is_virtual_pid(info->si_pid) ? info->si_pid : pid_type_to_vpid(PIDTYPE_TGID, info->si_pid);
+		si->cpt_uid = info->si_uid;
+		si->cpt_sigval = cpt_ptr_export(info->si_ptr);
+		break;
+	}
+	return 0;
+}
+
+static int dump_sigqueue(struct sigpending *list, struct cpt_context *ctx)
+{
+	struct sigqueue *q;
+	loff_t saved_obj;
+
+	if (list_empty(&list->list))
+		return 0;
+
+	cpt_push_object(&saved_obj, ctx);
+	list_for_each_entry(q, &list->list, list) {
+		struct cpt_siginfo_image si;
+
+		si.cpt_next = sizeof(si);
+		si.cpt_object = CPT_OBJ_SIGINFO;
+		si.cpt_hdrlen = sizeof(si);
+		si.cpt_content = CPT_CONTENT_VOID;
+
+		si.cpt_qflags = q->flags;
+		si.cpt_user = q->user->uid;
+
+		if (encode_siginfo(&si, &q->info))
+			return -EINVAL;
+
+		ctx->write(&si, sizeof(si), ctx);
+	}
+	cpt_pop_object(&saved_obj, ctx);
+	return 0;
+}
+
+
+
+static int dump_one_signal_struct(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct signal_struct *sig = obj->o_obj;
+	struct cpt_signal_image *v = cpt_get_buf(ctx);
+	task_t *tsk;
+	int i;
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_SIGNAL_STRUCT;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	if (sig->pgrp <= 0) {
+		eprintk_ctx("bad pgid\n");
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	}
+	v->cpt_pgrp_type = CPT_PGRP_NORMAL;
+	read_lock(&tasklist_lock);
+	tsk = find_task_by_pid_type_ve(PIDTYPE_PID, sig->pgrp);
+	if (tsk == NULL)
+		v->cpt_pgrp_type = CPT_PGRP_ORPHAN;
+	read_unlock(&tasklist_lock);
+	v->cpt_pgrp = pid_type_to_vpid(PIDTYPE_PGID, sig->pgrp);
+
+	v->cpt_old_pgrp = 0;
+	if (sig->tty_old_pgrp < 0) {
+		eprintk_ctx("bad tty_old_pgrp\n");
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	}
+	if (sig->tty_old_pgrp > 0) {
+		v->cpt_old_pgrp_type = CPT_PGRP_NORMAL;
+		read_lock(&tasklist_lock);
+		tsk = find_task_by_pid_type_ve(PIDTYPE_PID, sig->tty_old_pgrp);
+		if (tsk == NULL) {
+			v->cpt_old_pgrp_type = CPT_PGRP_ORPHAN;
+			tsk = find_task_by_pid_type_ve(PIDTYPE_PGID, sig->tty_old_pgrp);
+		}
+		read_unlock(&tasklist_lock);
+		if (tsk == NULL) {
+			eprintk_ctx("tty_old_pgrp does not exist anymore\n");
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+		v->cpt_old_pgrp = _pid_type_to_vpid(PIDTYPE_PGID, sig->tty_old_pgrp);
+		if ((int)v->cpt_old_pgrp < 0) {
+			dprintk_ctx("stray tty_old_pgrp %d\n", sig->tty_old_pgrp);
+			v->cpt_old_pgrp = -1;
+			v->cpt_old_pgrp_type = CPT_PGRP_STRAY;
+		}
+	}
+
+	if (sig->session <= 0) {
+		eprintk_ctx("bad session\n");
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	}
+	v->cpt_session_type = CPT_PGRP_NORMAL;
+	read_lock(&tasklist_lock);
+	tsk = find_task_by_pid_type_ve(PIDTYPE_PID, sig->session);
+	if (tsk == NULL)
+		v->cpt_session_type = CPT_PGRP_ORPHAN;
+	read_unlock(&tasklist_lock);
+	v->cpt_session = pid_type_to_vpid(PIDTYPE_SID, sig->session);
+
+	v->cpt_leader = sig->leader;
+	v->cpt_ctty = CPT_NULL;
+	if (sig->tty) {
+		cpt_object_t *cobj = lookup_cpt_object(CPT_OBJ_TTY, sig->tty, ctx);
+		if (cobj)
+			v->cpt_ctty = cobj->o_pos;
+		else {
+			eprintk_ctx("controlling tty is not found\n");
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+	}
+	memcpy(&v->cpt_sigpending, &sig->shared_pending.signal, 8);
+
+	v->cpt_curr_target = 0;
+	if (sig->curr_target)
+		v->cpt_curr_target = virt_pid(sig->curr_target);
+	v->cpt_group_exit = ((sig->flags & SIGNAL_GROUP_EXIT) != 0);
+	v->cpt_group_exit_code = sig->group_exit_code;
+	v->cpt_group_exit_task = 0;
+	if (sig->group_exit_task)
+		v->cpt_group_exit_task = virt_pid(sig->group_exit_task);
+	v->cpt_notify_count = sig->notify_count;
+	v->cpt_group_stop_count = sig->group_stop_count;
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,8)
+	v->cpt_utime = sig->utime;
+	v->cpt_stime = sig->stime;
+	v->cpt_cutime = sig->cutime;
+	v->cpt_cstime = sig->cstime;
+	v->cpt_nvcsw = sig->nvcsw;
+	v->cpt_nivcsw = sig->nivcsw;
+	v->cpt_cnvcsw = sig->cnvcsw;
+	v->cpt_cnivcsw = sig->cnivcsw;
+	v->cpt_min_flt = sig->min_flt;
+	v->cpt_maj_flt = sig->maj_flt;
+	v->cpt_cmin_flt = sig->cmin_flt;
+	v->cpt_cmaj_flt = sig->cmaj_flt;
+
+	if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
+		__asm__("undefined\n");
+
+	for (i=0; i<CPT_RLIM_NLIMITS; i++) {
+		if (i < RLIM_NLIMITS) {
+			v->cpt_rlim_cur[i] = sig->rlim[i].rlim_cur;
+			v->cpt_rlim_max[i] = sig->rlim[i].rlim_max;
+		} else {
+			v->cpt_rlim_cur[i] = CPT_NULL;
+			v->cpt_rlim_max[i] = CPT_NULL;
+		}
+	}
+#endif
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	dump_sigqueue(&sig->shared_pending, ctx);
+
+	cpt_close_object(ctx);
+	return 0;
+}
+
+
+int cpt_check_unsupported(struct task_struct *tsk, cpt_context_t *ctx)
+{
+#if 0
+	if (tsk->splice_pipe) {
+		eprintk_ctx("splice is used by " CPT_FID "\n", CPT_TID(tsk));
+		return -EBUSY;
+	}
+#endif
+#ifdef CONFIG_KEYS
+	if (tsk->request_key_auth || tsk->thread_keyring) {
+		eprintk_ctx("keys are used by " CPT_FID "\n", CPT_TID(tsk));
+		return -EBUSY;
+	}
+#endif
+#ifdef CONFIG_NUMA
+	if (tsk->mempolicy) {
+		eprintk_ctx("NUMA mempolicy is used by " CPT_FID "\n", CPT_TID(tsk));
+		return -EBUSY;
+	}
+#endif
+#ifdef CONFIG_TUX
+	if (tsk->tux_info) {
+		eprintk_ctx("TUX is used by " CPT_FID "\n", CPT_TID(tsk));
+		return -EBUSY;
+	}
+#endif
+	return 0;
+}
+
+static int dump_one_process(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	task_t *tsk = obj->o_obj;
+	int last_thread;
+	struct cpt_task_image *v = cpt_get_buf(ctx);
+	cpt_object_t *tobj;
+	cpt_object_t *tg_obj;
+	loff_t saved_obj;
+	int i;
+	int err;
+	struct timespec delta;
+	struct mm_struct * tsk_mm;
+	struct files_struct * tsk_files;
+	struct fs_struct * tsk_fs;
+	struct namespace * tsk_ns;
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_signal = CPT_NULL;
+	tg_obj = lookup_cpt_object(CPT_OBJ_SIGNAL_STRUCT, tsk->signal, ctx);
+	if (!tg_obj) BUG();
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_TASK;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_state = tsk->state;
+	if (tsk->state == EXIT_ZOMBIE) {
+		eprintk_ctx("invalid zombie state on" CPT_FID "\n", CPT_TID(tsk));
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	} else if (tsk->state == EXIT_DEAD) {
+		if (tsk->exit_state != EXIT_DEAD &&
+		    tsk->exit_state != EXIT_ZOMBIE) {
+			eprintk_ctx("invalid exit_state %ld on" CPT_FID "\n", tsk->exit_state, CPT_TID(tsk));
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+	}
+	if (tsk->exit_state) {
+		v->cpt_state = tsk->exit_state;
+		if (tsk->state != EXIT_DEAD) {
+			eprintk_ctx("invalid tsk->state %ld/%ld on" CPT_FID "\n",
+				tsk->state, tsk->exit_state, CPT_TID(tsk));
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+	}
+	if (cpt_check_unsupported(tsk, ctx)) {
+		cpt_release_buf(ctx);
+		return -EBUSY;
+	}
+
+	v->cpt_flags = tsk->flags&~(PF_FROZEN|PF_EXIT_RESTART);
+	v->cpt_ptrace = tsk->ptrace;
+	v->cpt_prio = tsk->prio;
+	v->cpt_exit_code = tsk->exit_code;
+	v->cpt_exit_signal = tsk->exit_signal;
+	v->cpt_pdeath_signal = tsk->pdeath_signal;
+	v->cpt_static_prio = tsk->static_prio;
+	v->cpt_rt_priority = tsk->rt_priority;
+	v->cpt_policy = tsk->policy;
+	if (v->cpt_policy != SCHED_NORMAL) {
+		eprintk_ctx("scheduler policy is not supported %d/%d(%s)\n", virt_pid(tsk), tsk->pid, tsk->comm);
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	}
+
+	/* Unpleasant moment. When leader of thread group exits,
+	 * it remains in zombie state until all the group exits.
+	 * We save not-NULL pointers to process mm/files/fs, so
+	 * that we can restore this thread group.
+	 */
+	tsk_mm = tsk->mm;
+	tsk_files = tsk->files;
+	tsk_fs = tsk->fs;
+	tsk_ns = tsk->namespace;
+
+	if (tsk->exit_state && !thread_group_empty(tsk) &&
+	    thread_group_leader(tsk)) {
+		struct task_struct * p = tsk;
+
+		read_lock(&tasklist_lock);
+		do {
+			if (p->mm)
+				tsk_mm = p->mm;
+			if (p->files)
+				tsk_files = p->files;
+			if (p->fs)
+				tsk_fs = p->fs;
+			if (p->namespace)
+				tsk_ns = p->namespace;
+			p = next_thread(p);
+		} while (p != tsk);
+		read_unlock(&tasklist_lock);
+	}
+
+	v->cpt_mm = CPT_NULL;
+	if (tsk_mm) {
+		tobj = lookup_cpt_object(CPT_OBJ_MM, tsk_mm, ctx);
+		if (!tobj) BUG();
+		v->cpt_mm = tobj->o_pos;
+	}
+	v->cpt_files = CPT_NULL;
+	if (tsk_files) {
+		tobj = lookup_cpt_object(CPT_OBJ_FILES, tsk_files, ctx);
+		if (!tobj) BUG();
+		v->cpt_files = tobj->o_pos;
+	}
+	v->cpt_fs = CPT_NULL;
+	if (tsk_fs) {
+		tobj = lookup_cpt_object(CPT_OBJ_FS, tsk_fs, ctx);
+		if (!tobj) BUG();
+		v->cpt_fs = tobj->o_pos;
+	}
+	v->cpt_namespace = CPT_NULL;
+	if (tsk_ns) {
+		tobj = lookup_cpt_object(CPT_OBJ_NAMESPACE, tsk_ns, ctx);
+		if (!tobj) BUG();
+		v->cpt_namespace = tobj->o_pos;
+
+		if (tsk_ns != current->namespace)
+			eprintk_ctx("namespaces are not supported: process " CPT_FID "\n", CPT_TID(tsk));
+	}
+	v->cpt_sysvsem_undo = CPT_NULL;
+	if (tsk->sysvsem.undo_list && !tsk->exit_state) {
+		tobj = lookup_cpt_object(CPT_OBJ_SYSVSEM_UNDO, tsk->sysvsem.undo_list, ctx);
+		if (!tobj) BUG();
+		v->cpt_sysvsem_undo = tobj->o_pos;
+	}
+	v->cpt_sighand = CPT_NULL;
+	if (tsk->sighand) {
+		tobj = lookup_cpt_object(CPT_OBJ_SIGHAND_STRUCT, tsk->sighand, ctx);
+		if (!tobj) BUG();
+		v->cpt_sighand = tobj->o_pos;
+	}
+	v->cpt_sigblocked = cpt_sigset_export(&tsk->blocked);
+	v->cpt_sigrblocked = cpt_sigset_export(&tsk->real_blocked);
+	v->cpt_sigsuspend_blocked = cpt_sigset_export(&tsk->saved_sigmask);
+
+	v->cpt_pid = virt_pid(tsk);
+	v->cpt_tgid = virt_tgid(tsk);
+	v->cpt_ppid = 0;
+	if (tsk->parent) {
+		if (tsk->parent != tsk->real_parent &&
+		    !lookup_cpt_object(CPT_OBJ_TASK, tsk->parent, ctx)) {
+			eprintk_ctx("task %d/%d(%s) is ptraced from ve0\n", tsk->pid, virt_pid(tsk), tsk->comm);
+			cpt_release_buf(ctx);
+			return -EBUSY;
+		}
+		v->cpt_ppid = virt_pid(tsk->parent);
+	}
+	v->cpt_rppid = tsk->real_parent ? virt_pid(tsk->real_parent) : 0;
+	v->cpt_pgrp = virt_pgid(tsk);
+	v->cpt_session = virt_sid(tsk);
+	v->cpt_old_pgrp = 0;
+	if (tsk->signal->tty_old_pgrp)
+		v->cpt_old_pgrp = _pid_type_to_vpid(PIDTYPE_PGID, tsk->signal->tty_old_pgrp);
+	v->cpt_leader = tsk->group_leader ? virt_pid(tsk->group_leader) : 0;
+	v->cpt_set_tid = (unsigned long)tsk->set_child_tid;
+	v->cpt_clear_tid = (unsigned long)tsk->clear_child_tid;
+	memcpy(v->cpt_comm, tsk->comm, 16);
+	v->cpt_user = tsk->user->uid;
+	v->cpt_uid = tsk->uid;
+	v->cpt_euid = tsk->euid;
+	v->cpt_suid = tsk->suid;
+	v->cpt_fsuid = tsk->fsuid;
+	v->cpt_gid = tsk->gid;
+	v->cpt_egid = tsk->egid;
+	v->cpt_sgid = tsk->sgid;
+	v->cpt_fsgid = tsk->fsgid;
+	v->cpt_ngids = 0;
+	if (tsk->group_info && tsk->group_info->ngroups != 0) {
+		int i = tsk->group_info->ngroups;
+		if (i > 32) {
+			/* Shame... I did a simplified version and _forgot_
+			 * about this. Later, later. */
+			eprintk_ctx("too many of groups " CPT_FID "\n", CPT_TID(tsk));
+			return -EINVAL;
+		}
+		v->cpt_ngids = i;
+		for (i--; i>=0; i--)
+			v->cpt_gids[i] = tsk->group_info->small_block[i];
+	}
+	v->cpt_prctl_uac = 0;
+	v->cpt_prctl_fpemu = 0;
+	v->__cpt_pad1 = 0;
+#ifdef CONFIG_IA64
+	v->cpt_prctl_uac = (tsk->thread.flags & IA64_THREAD_UAC_MASK) >> IA64_THREAD_UAC_SHIFT;
+	v->cpt_prctl_fpemu = (tsk->thread.flags & IA64_THREAD_FPEMU_MASK) >> IA64_THREAD_FPEMU_SHIFT;
+#endif
+	memcpy(&v->cpt_ecap, &tsk->cap_effective, 8);
+	memcpy(&v->cpt_icap, &tsk->cap_inheritable, 8);
+	memcpy(&v->cpt_pcap, &tsk->cap_permitted, 8);
+	v->cpt_keepcap = tsk->keep_capabilities;
+
+	v->cpt_did_exec = tsk->did_exec;
+	v->cpt_exec_domain = -1;
+	v->cpt_thrflags = tsk->thread_info->flags & ~(1<<TIF_FREEZE);
+	v->cpt_64bit = 0;
+#ifdef CONFIG_X86_64
+	/* Clear x86_64 specific flags */
+	v->cpt_thrflags &= ~(_TIF_FORK|_TIF_ABI_PENDING|_TIF_IA32);
+	if (!(tsk->thread_info->flags & _TIF_IA32)) {
+		ctx->tasks64++;
+		v->cpt_64bit = 1;
+	}
+#endif
+#ifdef CONFIG_IA64
+	/* Clear ia64 specific flags */
+	//// v->cpt_thrflags &= ~(_TIF_FORK|_TIF_ABI_PENDING|_TIF_IA32);
+	if (!IS_IA32_PROCESS(task_pt_regs(tsk))) {
+		ctx->tasks64++;
+		v->cpt_64bit = 1;
+	}
+#endif
+#ifndef CONFIG_IA64
+	v->cpt_thrstatus = tsk->thread_info->status;
+#endif
+	v->cpt_addr_limit = -1;
+
+	v->cpt_personality = tsk->personality;
+
+#ifdef CONFIG_X86
+	for (i=0; i<GDT_ENTRY_TLS_ENTRIES; i++) {
+		if (i>=3) {
+			eprintk_ctx("too many tls descs\n");
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+#ifndef CONFIG_X86_64
+		v->cpt_tls[i] = (((u64)tsk->thread.tls_array[i].b)<<32) + tsk->thread.tls_array[i].a;
+#else
+		v->cpt_tls[i] = tsk->thread.tls_array[i];
+#endif
+	}
+#endif
+
+	v->cpt_restart.fn = CPT_RBL_0;
+	if (tsk->thread_info->restart_block.fn != current->thread_info->restart_block.fn) {
+		ktime_t e;
+
+		if (tsk->thread_info->restart_block.fn != nanosleep_restart
+#if defined(CONFIG_X86_64) && defined(CONFIG_COMPAT)
+		    && tsk->thread_info->restart_block.fn != compat_nanosleep_restart
+#endif
+		    ) {
+			eprintk_ctx("unknown restart block %p\n", tsk->thread_info->restart_block.fn);
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+		v->cpt_restart.fn = CPT_RBL_NANOSLEEP;
+#if defined(CONFIG_X86_64) && defined(CONFIG_COMPAT)
+		if (tsk->thread_info->restart_block.fn == compat_nanosleep_restart)
+			v->cpt_restart.fn = CPT_RBL_COMPAT_NANOSLEEP;
+#endif
+
+		e.tv64 = ((u64)tsk->thread_info->restart_block.arg1 << 32) |
+			(u64) tsk->thread_info->restart_block.arg0;
+		e = ktime_sub(e, timespec_to_ktime(ctx->cpt_monotonic_time));
+		v->cpt_restart.arg0 = ktime_to_ns(e);
+		v->cpt_restart.arg1 = 0;
+		v->cpt_restart.arg2 = tsk->thread_info->restart_block.arg2;
+		v->cpt_restart.arg3 = tsk->thread_info->restart_block.arg3;
+		dprintk_ctx(CPT_FID " %Lu\n", CPT_TID(tsk), (unsigned long long)v->cpt_restart.arg0);
+	}
+
+	v->cpt_it_real_incr = 0;
+	v->cpt_it_prof_incr = 0;
+	v->cpt_it_virt_incr = 0;
+	v->cpt_it_real_value = 0;
+	v->cpt_it_prof_value = 0;
+	v->cpt_it_virt_value = 0;
+	if (thread_group_leader(tsk) && tsk->exit_state == 0) {
+		ktime_t rem;
+
+		v->cpt_it_real_incr = ktime_to_ns(tsk->signal->it_real_incr);
+		v->cpt_it_prof_incr = tsk->signal->it_prof_incr;
+		v->cpt_it_virt_incr = tsk->signal->it_virt_incr;
+
+		rem = hrtimer_get_remaining(&tsk->signal->real_timer);
+
+		if (hrtimer_active(&tsk->signal->real_timer)) {
+			if (rem.tv64 <= 0)
+				rem.tv64 = NSEC_PER_USEC;
+			v->cpt_it_real_value = ktime_to_ns(rem);
+			dprintk("cpt itimer " CPT_FID " %Lu\n", CPT_TID(tsk), (unsigned long long)v->cpt_it_real_value);
+		}
+		v->cpt_it_prof_value = tsk->signal->it_prof_expires;
+		v->cpt_it_virt_value = tsk->signal->it_virt_expires;
+	}
+	v->cpt_used_math = (tsk_used_math(tsk) != 0);
+
+	if (tsk->notifier) {
+		eprintk_ctx("task notifier is in use: process %d/%d(%s)\n", virt_pid(tsk), tsk->pid, tsk->comm);
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	}
+
+	v->cpt_utime = tsk->utime;
+	v->cpt_stime = tsk->stime;
+	delta = tsk->start_time;
+	_set_normalized_timespec(&delta,
+			delta.tv_sec - get_exec_env()->start_timespec.tv_sec,
+			delta.tv_nsec - get_exec_env()->start_timespec.tv_nsec);
+	v->cpt_starttime = cpt_timespec_export(&delta);
+	v->cpt_nvcsw = tsk->nvcsw;
+	v->cpt_nivcsw = tsk->nivcsw;
+	v->cpt_min_flt = tsk->min_flt;
+	v->cpt_maj_flt = tsk->maj_flt;
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,8)
+	v->cpt_cutime = tsk->cutime;
+	v->cpt_cstime = tsk->cstime;
+	v->cpt_cnvcsw = tsk->cnvcsw;
+	v->cpt_cnivcsw = tsk->cnivcsw;
+	v->cpt_cmin_flt = tsk->cmin_flt;
+	v->cpt_cmaj_flt = tsk->cmaj_flt;
+
+	if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
+		__asm__("undefined\n");
+
+	for (i=0; i<CPT_RLIM_NLIMITS; i++) {
+		if (i < RLIM_NLIMITS) {
+			v->cpt_rlim_cur[i] = tsk->rlim[i].rlim_cur;
+			v->cpt_rlim_max[i] = tsk->rlim[i].rlim_max;
+		} else {
+			v->cpt_rlim_cur[i] = CPT_NULL;
+			v->cpt_rlim_max[i] = CPT_NULL;
+		}
+	}
+#else
+	v->cpt_cutime = tsk->signal->cutime;
+	v->cpt_cstime = tsk->signal->cstime;
+	v->cpt_cnvcsw = tsk->signal->cnvcsw;
+	v->cpt_cnivcsw = tsk->signal->cnivcsw;
+	v->cpt_cmin_flt = tsk->signal->cmin_flt;
+	v->cpt_cmaj_flt = tsk->signal->cmaj_flt;
+
+	if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
+		__asm__("undefined\n");
+
+	for (i=0; i<CPT_RLIM_NLIMITS; i++) {
+		if (i < RLIM_NLIMITS) {
+			v->cpt_rlim_cur[i] = tsk->signal->rlim[i].rlim_cur;
+			v->cpt_rlim_max[i] = tsk->signal->rlim[i].rlim_max;
+		} else {
+			v->cpt_rlim_cur[i] = CPT_NULL;
+			v->cpt_rlim_max[i] = CPT_NULL;
+		}
+	}
+#endif
+
+#ifdef CONFIG_USER_RESOURCE
+	if (tsk->mm)
+		v->cpt_mm_ub = cpt_lookup_ubc(tsk->mm->mm_ub, ctx);
+	else
+		v->cpt_mm_ub = CPT_NULL;
+	v->cpt_task_ub = cpt_lookup_ubc(tsk->task_bc.task_ub, ctx);
+	v->cpt_exec_ub = cpt_lookup_ubc(tsk->task_bc.exec_ub, ctx);
+	v->cpt_fork_sub = cpt_lookup_ubc(tsk->task_bc.fork_sub, ctx);
+#endif
+
+	v->cpt_ptrace_message = tsk->ptrace_message;
+	v->cpt_pn_state = tsk->pn_state;
+	v->cpt_stopped_state = tsk->stopped_state;
+	v->cpt_sigsuspend_state = 0;
+
+#ifdef CONFIG_X86_32
+	if (tsk->thread.vm86_info) {
+		eprintk_ctx("vm86 task is running\n");
+		cpt_release_buf(ctx);
+		return -EBUSY;
+	}
+#endif
+
+	v->cpt_sigpending = cpt_sigset_export(&tsk->pending.signal);
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+	dump_kstack(tsk, ctx);
+	cpt_pop_object(&saved_obj, ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+	err = dump_registers(tsk, ctx);
+	cpt_pop_object(&saved_obj, ctx);
+	if (err)
+		return err;
+
+	if (tsk_used_math(tsk)) {
+		cpt_push_object(&saved_obj, ctx);
+		dump_fpustate(tsk, ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+
+	if (tsk->last_siginfo) {
+		struct cpt_siginfo_image si;
+		cpt_push_object(&saved_obj, ctx);
+
+		si.cpt_next = sizeof(si);
+		si.cpt_object = CPT_OBJ_LASTSIGINFO;
+		si.cpt_hdrlen = sizeof(si);
+		si.cpt_content = CPT_CONTENT_VOID;
+
+		if (encode_siginfo(&si, tsk->last_siginfo))
+			return -EINVAL;
+
+		ctx->write(&si, sizeof(si), ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+
+	if (tsk->sas_ss_size) {
+		struct cpt_sigaltstack_image si;
+		cpt_push_object(&saved_obj, ctx);
+
+		si.cpt_next = sizeof(si);
+		si.cpt_object = CPT_OBJ_SIGALTSTACK;
+		si.cpt_hdrlen = sizeof(si);
+		si.cpt_content = CPT_CONTENT_VOID;
+
+		si.cpt_stack = tsk->sas_ss_sp;
+		si.cpt_stacksize = tsk->sas_ss_size;
+
+		ctx->write(&si, sizeof(si), ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+
+	dump_sigqueue(&tsk->pending, ctx);
+
+	last_thread = 1;
+	read_lock(&tasklist_lock);
+	do {
+		task_t * next = next_thread(tsk);
+		if (next != tsk && !thread_group_leader(next))
+			last_thread = 0;
+	} while (0);
+	read_unlock(&tasklist_lock);
+
+	if (last_thread) {
+		task_t *prev_tsk;
+		int err;
+		loff_t pos = ctx->file->f_pos;
+
+		cpt_push_object(&saved_obj, ctx);
+		err = dump_one_signal_struct(tg_obj, ctx);
+		cpt_pop_object(&saved_obj, ctx);
+		if (err)
+			return err;
+
+		prev_tsk = tsk;
+		for (;;) {
+			if (prev_tsk->tgid == tsk->tgid) {
+				loff_t tg_pos;
+
+				tg_pos = obj->o_pos + offsetof(struct cpt_task_image, cpt_signal);
+				ctx->pwrite(&pos, sizeof(pos), ctx, tg_pos);
+				if (thread_group_leader(prev_tsk))
+					break;
+			}
+
+			if (obj->o_list.prev == &ctx->object_array[CPT_OBJ_TASK]) {
+				eprintk_ctx("bug: thread group leader is lost\n");
+				return -EINVAL;
+			}
+
+			obj = list_entry(obj->o_list.prev, cpt_object_t, o_list);
+			prev_tsk = obj->o_obj;
+		}
+	}
+
+	cpt_close_object(ctx);
+	return 0;
+}
+
+int cpt_dump_tasks(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	cpt_open_section(ctx, CPT_SECT_TASKS);
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		int err;
+
+		if ((err = dump_one_process(obj, ctx)) != 0)
+			return err;
+	}
+
+	cpt_close_section(ctx);
+	return 0;
+}
+
+int cpt_collect_signals(cpt_context_t *ctx)
+{
+	cpt_object_t *obj;
+
+	/* Collect process fd sets */
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+		if (tsk->signal && !list_empty(&tsk->signal->posix_timers)) {
+			eprintk_ctx("task %d/%d(%s) uses posix timers\n", tsk->pid, virt_pid(tsk), tsk->comm);
+			return -EBUSY;
+		}
+		if (tsk->signal && cpt_object_add(CPT_OBJ_SIGNAL_STRUCT, tsk->signal, ctx) == NULL)
+			return -ENOMEM;
+		if (tsk->sighand && cpt_object_add(CPT_OBJ_SIGHAND_STRUCT, tsk->sighand, ctx) == NULL)
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+
+static int dump_one_sighand_struct(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct sighand_struct *sig = obj->o_obj;
+	struct cpt_sighand_image *v = cpt_get_buf(ctx);
+	int i;
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_SIGHAND_STRUCT;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	for (i=0; i< _NSIG; i++) {
+		if (sig->action[i].sa.sa_handler != SIG_DFL ||
+		    sig->action[i].sa.sa_flags) {
+			loff_t saved_obj;
+			struct cpt_sighandler_image *o = cpt_get_buf(ctx);
+
+			cpt_push_object(&saved_obj, ctx);
+			cpt_open_object(NULL, ctx);
+
+			o->cpt_next = CPT_NULL;
+			o->cpt_object = CPT_OBJ_SIGHANDLER;
+			o->cpt_hdrlen = sizeof(*o);
+			o->cpt_content = CPT_CONTENT_VOID;
+
+			o->cpt_signo = i;
+			o->cpt_handler = (unsigned long)sig->action[i].sa.sa_handler;
+			o->cpt_restorer = 0;
+#ifdef CONFIG_X86
+			o->cpt_restorer = (unsigned long)sig->action[i].sa.sa_restorer;
+#endif
+			o->cpt_flags = sig->action[i].sa.sa_flags;
+			memcpy(&o->cpt_mask, &sig->action[i].sa.sa_mask, 8);
+			ctx->write(o, sizeof(*o), ctx);
+			cpt_release_buf(ctx);
+			cpt_close_object(ctx);
+			cpt_pop_object(&saved_obj, ctx);
+		}
+	}
+
+	cpt_close_object(ctx);
+	return 0;
+}
+
+int cpt_dump_sighand(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	cpt_open_section(ctx, CPT_SECT_SIGHAND_STRUCT);
+
+	for_each_object(obj, CPT_OBJ_SIGHAND_STRUCT) {
+		int err;
+
+		if ((err = dump_one_sighand_struct(obj, ctx)) != 0)
+			return err;
+	}
+
+	cpt_close_section(ctx);
+	return 0;
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_process.h linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_process.h
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_process.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_process.h	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,10 @@
+int cpt_collect_signals(cpt_context_t *);
+int cpt_dump_signal(struct cpt_context *);
+int cpt_dump_sighand(struct cpt_context *);
+int cpt_dump_tasks(struct cpt_context *);
+
+int rst_signal_complete(struct cpt_task_image *ti, int *exiting, struct cpt_context *ctx);
+__u32 rst_signal_flag(struct cpt_task_image *ti, struct cpt_context *ctx);
+
+int rst_restore_process(struct cpt_context *ctx);
+int rst_process_linkage(struct cpt_context *ctx);
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_socket.c linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_socket.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_socket.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_socket.c	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,779 @@
+/*
+ *
+ *  kernel/cpt/cpt_socket.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/socket.h>
+#include <linux/un.h>
+#include <linux/tcp.h>
+#include <net/sock.h>
+#include <net/scm.h>
+#include <net/af_unix.h>
+#include <net/tcp.h>
+#include <net/netlink_sock.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_socket.h"
+#include "cpt_files.h"
+#include "cpt_kernel.h"
+
+static int dump_rqueue(int owner, struct sock *sk, struct cpt_context *ctx);
+
+
+/* Sockets are quite different of another kinds of files.
+ * There is one simplification: only one struct file can refer to a socket,
+ * so we could store information about socket directly in section FILES as
+ * a description of a file and append f.e. array of not-yet-accepted
+ * connections of listening socket as array of auxiliary data.
+ *
+ * Complications are:
+ * 1. TCP sockets can be orphans. We have to relocate orphans as well,
+ *    so we have to create special section for orphans.
+ * 2. AF_UNIX sockets are distinguished objects: set of links between
+ *    AF_UNIX sockets is quite arbitrary.
+ *    A. Each socket can refers to many of files due to FD passing.
+ *    B. Each socket except for connected ones can have in queue skbs
+ *       sent by any of sockets.
+ *
+ *    2A is relatively easy: after our tasks are frozen we make an additional
+ *    recursive pass throgh set of collected files and get referenced to
+ *    FD passed files. After end of recursion, all the files are treated
+ *    in the same way. All they will be stored in section FILES.
+ *
+ *    2B. We have to resolve all those references at some point.
+ *    It is the place where pipe-like approach to image fails.
+ *
+ * All this makes socket checkpointing quite chumbersome.
+ * Right now we collect all the sockets and assign some numeric index value
+ * to each of them. The socket section is separate and put after section FILES,
+ * so section FILES refers to sockets by index, section SOCKET refers to FILES
+ * as usual by position in image. All the refs inside socket section are
+ * by index. When restoring we read socket section, create objects to hold
+ * mappings index <-> pos. At the second pass we open sockets (simultaneosly
+ * with their pairs) and create FILE objects.
+ */
+
+
+/* ====== FD passing ====== */
+
+/* Almost nobody does FD passing via AF_UNIX sockets, nevertheless we
+ * have to implement this. A problem is that in general case we receive
+ * skbs from an unknown context, so new files can arrive to checkpointed
+ * set of processes even after they are stopped. Well, we are going just
+ * to ignore unknown fds while doing real checkpointing. It is fair because
+ * links outside checkpointed set are going to fail anyway.
+ *
+ * ATTN: the procedure is recursive. We linearize the recursion adding
+ * newly found files to the end of file list, so they will be analyzed
+ * in the same loop.
+ */
+
+static int collect_one_passedfd(struct file *file, cpt_context_t * ctx)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct socket *sock;
+	struct sock *sk;
+	struct sk_buff *skb;
+
+	if (!S_ISSOCK(inode->i_mode))
+		return -ENOTSOCK;
+
+	sock = &container_of(inode, struct socket_alloc, vfs_inode)->socket;
+
+	if (sock->ops->family != AF_UNIX)
+		return 0;
+
+	sk = sock->sk;
+
+	/* Subtle locking issue. skbs cannot be removed while
+	 * we are scanning, because all the processes are stopped.
+	 * They still can be added to tail of queue. Locking while
+	 * we dereference skb->next is enough to resolve this.
+	 * See above about collision with skbs added after we started
+	 * checkpointing.
+	 */
+
+	skb = skb_peek(&sk->sk_receive_queue);
+	while (skb && skb != (struct sk_buff*)&sk->sk_receive_queue) {
+		if (UNIXCB(skb).fp && skb->sk &&
+		    (!sock_flag(skb->sk, SOCK_DEAD) || unix_peer(sk) == skb->sk)) {
+			struct scm_fp_list *fpl = UNIXCB(skb).fp;
+			int i;
+
+			for (i = fpl->count-1; i >= 0; i--) {
+				if (cpt_object_add(CPT_OBJ_FILE, fpl->fp[i], ctx) == NULL)
+					return -ENOMEM;
+			}
+		}
+
+		spin_lock_irq(&sk->sk_receive_queue.lock);
+		skb = skb->next;
+		spin_unlock_irq(&sk->sk_receive_queue.lock);
+	}
+
+	return 0;
+}
+
+int cpt_collect_passedfds(cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file = obj->o_obj;
+
+		if (S_ISSOCK(file->f_dentry->d_inode->i_mode)) {
+			int err;
+
+			if ((err = collect_one_passedfd(file, ctx)) < 0)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+/* ====== End of FD passing ====== */
+
+/* Must be called under bh_lock_sock() */
+
+void clear_backlog(struct sock *sk)
+{
+	struct sk_buff *skb = sk->sk_backlog.head;
+
+	sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
+	while (skb) {
+		struct sk_buff *next = skb->next;
+
+		skb->next = NULL;
+		kfree_skb(skb);
+		skb = next;
+	}
+}
+
+void release_sock_nobacklog(struct sock *sk)
+{
+	spin_lock_bh(&(sk->sk_lock.slock));
+	clear_backlog(sk);
+	sk->sk_lock.owner = NULL;
+        if (waitqueue_active(&(sk->sk_lock.wq)))
+		wake_up(&(sk->sk_lock.wq));
+	spin_unlock_bh(&(sk->sk_lock.slock));
+}
+
+int cpt_dump_skb(int type, int owner, struct sk_buff *skb,
+		 struct cpt_context *ctx)
+{
+	struct cpt_skb_image *v = cpt_get_buf(ctx);
+	loff_t saved_obj;
+	struct timeval tmptv;
+
+	cpt_push_object(&saved_obj, ctx);
+	cpt_open_object(NULL, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_SKB;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_owner = owner;
+	v->cpt_queue = type;
+	skb_get_timestamp(skb, &tmptv);
+	v->cpt_stamp = cpt_timeval_export(&tmptv);
+	v->cpt_hspace = skb->data - skb->head;
+	v->cpt_tspace = skb->end - skb->tail;
+	v->cpt_h = skb->h.raw - skb->head;
+	v->cpt_nh = skb->nh.raw - skb->head;
+	v->cpt_mac = skb->mac.raw - skb->head;
+	BUILD_BUG_ON(sizeof(skb->cb) < sizeof(v->cpt_cb));
+	memcpy(v->cpt_cb, skb->cb, sizeof(v->cpt_cb));
+	if (sizeof(skb->cb) > sizeof(v->cpt_cb)) {
+		int i;
+		for (i=sizeof(v->cpt_cb); i<sizeof(skb->cb); i++) {
+			if (skb->cb[i]) {
+				wprintk_ctx("dirty skb cb");
+				break;
+			}
+		}
+	}
+	v->cpt_len = skb->len;
+	v->cpt_mac_len = skb->mac_len;
+	v->cpt_csum = skb->csum;
+	v->cpt_local_df = skb->local_df;
+	v->cpt_pkt_type = skb->pkt_type;
+	v->cpt_ip_summed = skb->ip_summed;
+	v->cpt_priority = skb->priority;
+	v->cpt_protocol = skb->protocol;
+	v->cpt_security = 0;
+	v->cpt_gso_segs = skb_shinfo(skb)->gso_segs;
+	v->cpt_gso_size = skb_shinfo(skb)->gso_size;
+	if (skb_shinfo(skb)->gso_type) {
+		eprintk_ctx("skb ufo is not supported\n");
+		return -EINVAL;
+	}
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	if (skb->len + (skb->data - skb->head) > 0) {
+		struct cpt_obj_bits ob;
+		loff_t saved_obj2;
+
+		cpt_push_object(&saved_obj2, ctx);
+		cpt_open_object(NULL, ctx);
+		ob.cpt_next = CPT_NULL;
+		ob.cpt_object = CPT_OBJ_BITS;
+		ob.cpt_hdrlen = sizeof(ob);
+		ob.cpt_content = CPT_CONTENT_DATA;
+		ob.cpt_size = skb->len + v->cpt_hspace;
+
+		ctx->write(&ob, sizeof(ob), ctx);
+
+		ctx->write(skb->head, (skb->data-skb->head) + (skb->len-skb->data_len), ctx);
+		if (skb->data_len) {
+			int offset = skb->len - skb->data_len;
+			while (offset < skb->len) {
+				int copy = skb->len - offset;
+				if (copy > PAGE_SIZE)
+					copy = PAGE_SIZE;
+				(void)cpt_get_buf(ctx);
+				if (skb_copy_bits(skb, offset, ctx->tmpbuf, copy))
+					BUG();
+				ctx->write(ctx->tmpbuf, copy, ctx);
+				__cpt_release_buf(ctx);
+				offset += copy;
+			}
+		}
+
+		ctx->align(ctx);
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_obj2, ctx);
+	}
+
+	if (skb->sk && skb->sk->sk_family == AF_UNIX) {
+		struct scm_fp_list *fpl = UNIXCB(skb).fp;
+
+		if (fpl) {
+			int i;
+
+			for (i = 0; i < fpl->count; i++) {
+				struct cpt_fd_image v;
+				cpt_object_t *obj;
+				loff_t saved_obj2;
+
+				obj = lookup_cpt_object(CPT_OBJ_FILE, fpl->fp[i], ctx);
+
+				if (!obj) {
+					eprintk_ctx("lost passed FD\n");
+					return -EINVAL;
+				}
+
+				cpt_push_object(&saved_obj2, ctx);
+				cpt_open_object(NULL, ctx);
+				v.cpt_next = CPT_NULL;
+				v.cpt_object = CPT_OBJ_FILEDESC;
+				v.cpt_hdrlen = sizeof(v);
+				v.cpt_content = CPT_CONTENT_VOID;
+
+				v.cpt_fd = i;
+				v.cpt_file = obj->o_pos;
+				v.cpt_flags = 0;
+				ctx->write(&v, sizeof(v), ctx);
+				cpt_close_object(ctx);
+				cpt_pop_object(&saved_obj2, ctx);
+			}
+		}
+	}
+
+	cpt_close_object(ctx);
+	cpt_pop_object(&saved_obj, ctx);
+	return 0;
+}
+
+static int dump_rqueue(int idx, struct sock *sk, struct cpt_context *ctx)
+{
+	struct sk_buff *skb;
+	struct sock *sk_cache = NULL;
+
+	skb = skb_peek(&sk->sk_receive_queue);
+	while (skb && skb != (struct sk_buff*)&sk->sk_receive_queue) {
+		int err;
+
+		if (sk->sk_family == AF_UNIX) {
+			cpt_object_t *obj;
+			if (skb->sk != sk_cache) {
+				idx = -1;
+				sk_cache = NULL;
+				obj = lookup_cpt_object(CPT_OBJ_SOCKET, skb->sk, ctx);
+				if (obj) {
+					idx = obj->o_index;
+					sk_cache = skb->sk;
+				} else if (unix_peer(sk) != skb->sk)
+					goto next_skb;
+			}
+		}
+
+		err = cpt_dump_skb(CPT_SKB_RQ, idx, skb, ctx);
+		if (err)
+			return err;
+
+next_skb:
+		spin_lock_irq(&sk->sk_receive_queue.lock);
+		skb = skb->next;
+		spin_unlock_irq(&sk->sk_receive_queue.lock);
+	}
+	return 0;
+}
+
+static int dump_wqueue(int idx, struct sock *sk, struct cpt_context *ctx)
+{
+	struct sk_buff *skb;
+
+	skb = skb_peek(&sk->sk_write_queue);
+	while (skb && skb != (struct sk_buff*)&sk->sk_write_queue) {
+		int err = cpt_dump_skb(CPT_SKB_WQ, idx, skb, ctx);
+		if (err)
+			return err;
+
+		spin_lock_irq(&sk->sk_write_queue.lock);
+		skb = skb->next;
+		spin_unlock_irq(&sk->sk_write_queue.lock);
+	}
+	return 0;
+}
+
+void cpt_dump_sock_attr(struct sock *sk, cpt_context_t *ctx)
+{
+	loff_t saved_obj;
+	if (sk->sk_filter) {
+		struct cpt_obj_bits v;
+
+		cpt_push_object(&saved_obj, ctx);
+		cpt_open_object(NULL, ctx);
+
+		v.cpt_next = CPT_NULL;
+		v.cpt_object = CPT_OBJ_SKFILTER;
+		v.cpt_hdrlen = sizeof(v);
+		v.cpt_content = CPT_CONTENT_DATA;
+		v.cpt_size = sk->sk_filter->len*sizeof(struct sock_filter);
+
+		ctx->write(&v, sizeof(v), ctx);
+		ctx->write(sk->sk_filter->insns, v.cpt_size, ctx);
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+	if (sk->sk_family == AF_INET || sk->sk_family == AF_INET6) {
+		cpt_push_object(&saved_obj, ctx);
+		cpt_dump_mcfilter(sk, ctx);
+		cpt_pop_object(&saved_obj, ctx);
+	}
+}
+
+/* Dump socket content */
+
+int cpt_dump_socket(cpt_object_t *obj, struct sock *sk, int index, int parent, struct cpt_context *ctx)
+{
+	struct cpt_sock_image *v = cpt_get_buf(ctx);
+	struct socket *sock;
+
+	cpt_open_object(obj, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_SOCKET;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_file = CPT_NULL;
+	sock = sk->sk_socket;
+	if (sock && sock->file) {
+		cpt_object_t *tobj;
+		tobj = lookup_cpt_object(CPT_OBJ_FILE, sock->file, ctx);
+		if (tobj)
+			v->cpt_file = tobj->o_pos;
+	}
+	v->cpt_index = index;
+	v->cpt_parent = parent;
+
+	if (sk->sk_family == AF_INET || sk->sk_family == AF_INET6) {
+		if (sock && !obj->o_lock) {
+			lock_sock(sk);
+			obj->o_lock = 1;
+		}
+	}
+
+	/* Some bits stored in inode */
+	v->cpt_ssflags = sock ? sock->flags : 0;
+	v->cpt_sstate = sock ? sock->state : 0;
+	v->cpt_passcred = sock ? test_bit(SOCK_PASSCRED, &sock->flags) : 0;
+
+	/* Common data */
+	v->cpt_family = sk->sk_family;
+	v->cpt_type = sk->sk_type;
+	v->cpt_state = sk->sk_state;
+	v->cpt_reuse = sk->sk_reuse;
+	v->cpt_zapped = sock_flag(sk, SOCK_ZAPPED);
+	v->cpt_shutdown = sk->sk_shutdown;
+	v->cpt_userlocks = sk->sk_userlocks;
+	v->cpt_no_check = sk->sk_no_check;
+	v->cpt_zapped = sock_flag(sk, SOCK_DBG);
+	v->cpt_rcvtstamp = sock_flag(sk, SOCK_RCVTSTAMP);
+	v->cpt_localroute = sock_flag(sk, SOCK_LOCALROUTE);
+	v->cpt_protocol = sk->sk_protocol;
+	v->cpt_err = sk->sk_err;
+	v->cpt_err_soft = sk->sk_err_soft;
+	v->cpt_max_ack_backlog = sk->sk_max_ack_backlog;
+	v->cpt_priority = sk->sk_priority;
+	v->cpt_rcvlowat = sk->sk_rcvlowat;
+	v->cpt_rcvtimeo = CPT_NULL;
+	if (sk->sk_rcvtimeo != MAX_SCHEDULE_TIMEOUT)
+		v->cpt_rcvtimeo = sk->sk_rcvtimeo > INT_MAX ? INT_MAX : sk->sk_rcvtimeo;
+	v->cpt_sndtimeo = CPT_NULL;
+	if (sk->sk_sndtimeo != MAX_SCHEDULE_TIMEOUT)
+		v->cpt_sndtimeo = sk->sk_sndtimeo > INT_MAX ? INT_MAX : sk->sk_sndtimeo;
+	v->cpt_rcvbuf = sk->sk_rcvbuf;
+	v->cpt_sndbuf = sk->sk_sndbuf;
+	v->cpt_bound_dev_if = sk->sk_bound_dev_if;
+	v->cpt_flags = sk->sk_flags;
+	v->cpt_lingertime = CPT_NULL;
+	if (sk->sk_lingertime != MAX_SCHEDULE_TIMEOUT)
+		v->cpt_lingertime = sk->sk_lingertime > INT_MAX ? INT_MAX : sk->sk_lingertime;
+	v->cpt_peer_pid = sk->sk_peercred.pid;
+	v->cpt_peer_uid = sk->sk_peercred.uid;
+	v->cpt_peer_gid = sk->sk_peercred.gid;
+	v->cpt_stamp = cpt_timeval_export(&sk->sk_stamp);
+
+	v->cpt_peer = -1;
+	v->cpt_socketpair = 0;
+	v->cpt_deleted = 0;
+
+	v->cpt_laddrlen = 0;
+	if (sock) {
+		int alen = sizeof(v->cpt_laddr);
+		int err = sock->ops->getname(sock, (struct sockaddr*)&v->cpt_laddr, &alen, 0);
+		if (err) {
+			cpt_release_buf(ctx);
+			return err;
+		}
+		v->cpt_laddrlen = alen;
+	}
+	v->cpt_raddrlen = 0;
+	if (sock) {
+		int alen = sizeof(v->cpt_raddr);
+		int err = sock->ops->getname(sock, (struct sockaddr*)&v->cpt_raddr, &alen, 2);
+		if (!err)
+			v->cpt_raddrlen = alen;
+	}
+
+	if (sk->sk_family == AF_UNIX) {
+		if (unix_sk(sk)->dentry) {
+			struct dentry *d = unix_sk(sk)->dentry;
+			v->cpt_deleted = !IS_ROOT(d) && d_unhashed(d);
+			if (!v->cpt_deleted) {
+				int err = 0;
+				char *path;
+				unsigned long pg = __get_free_page(GFP_KERNEL);
+
+				if (!pg) {
+					cpt_release_buf(ctx);
+					return -ENOMEM;
+				}
+
+				path = d_path(d, unix_sk(sk)->mnt, (char *)pg, PAGE_SIZE);
+
+				if (!IS_ERR(path)) {
+					int len = strlen(path);
+					if (len < 126) {
+						strcpy(((char*)v->cpt_laddr)+2, path);
+						v->cpt_laddrlen = len + 2;
+					} else {
+						wprintk_ctx("af_unix path is too long: %s (%s)\n", path, ((char*)v->cpt_laddr)+2);
+					}
+					err = cpt_verify_overmount(path, d, unix_sk(sk)->mnt, ctx);
+				} else {
+					eprintk_ctx("cannot get path of an af_unix socket\n");
+					err = PTR_ERR(path);
+				}
+				free_page(pg);
+				if (err) {
+					cpt_release_buf(ctx);
+					return err;
+				}
+			}
+		}
+
+		/* If the socket is connected, find its peer. If peer is not
+		 * in our table, the socket is connected to external process
+		 * and we consider it disconnected.
+		 */
+		if (unix_peer(sk)) {
+			cpt_object_t *pobj;
+			pobj = lookup_cpt_object(CPT_OBJ_SOCKET, unix_peer(sk), ctx);
+			if (pobj)
+				v->cpt_peer = pobj->o_index;
+			else
+				v->cpt_shutdown = SHUTDOWN_MASK;
+
+			if (unix_peer(unix_peer(sk)) == sk)
+				v->cpt_socketpair = 1;
+		}
+
+		/* If the socket shares address with another socket it is
+		 * child of some listening socket. Find and record it. */
+		if (unix_sk(sk)->addr &&
+		    atomic_read(&unix_sk(sk)->addr->refcnt) > 1 &&
+		    sk->sk_state != TCP_LISTEN) {
+			cpt_object_t *pobj;
+			for_each_object(pobj, CPT_OBJ_SOCKET) {
+				struct sock *psk = pobj->o_obj;
+				if (psk->sk_family == AF_UNIX &&
+				    psk->sk_state == TCP_LISTEN &&
+				    unix_sk(psk)->addr == unix_sk(sk)->addr) {
+					v->cpt_parent = pobj->o_index;
+					break;
+				}
+			}
+		}
+	}
+
+	if (sk->sk_family == AF_INET || sk->sk_family == AF_INET6)
+		cpt_dump_socket_in(v, sk, ctx);
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	cpt_dump_sock_attr(sk, ctx);
+
+	dump_rqueue(index, sk, ctx);
+	if (sk->sk_family == AF_INET || sk->sk_family == AF_INET6) {
+		dump_wqueue(index, sk, ctx);
+		cpt_dump_ofo_queue(index, sk, ctx);
+	}
+
+	if ((sk->sk_family == AF_INET || sk->sk_family == AF_INET6)
+	    && sk->sk_state == TCP_LISTEN)
+		cpt_dump_synwait_queue(sk, index, ctx);
+
+	cpt_close_object(ctx);
+
+	if ((sk->sk_family == AF_INET || sk->sk_family == AF_INET6)
+	    && sk->sk_state == TCP_LISTEN)
+		cpt_dump_accept_queue(sk, index, ctx);
+
+	return 0;
+}
+
+int cpt_dump_orphaned_sockets(struct cpt_context *ctx)
+{
+	int i;
+
+	cpt_open_section(ctx, CPT_SECT_ORPHANS);
+
+	for (i = 0; i < tcp_hashinfo.ehash_size; i++) {
+		struct sock *sk;
+		struct hlist_node *node;
+
+retry:
+		read_lock_bh(&tcp_hashinfo.ehash[i].lock);
+		sk_for_each(sk, node, &tcp_hashinfo.ehash[i].chain) {
+
+			if (sk->owner_env != get_exec_env())
+				continue;
+			if (sk->sk_socket)
+				continue;
+			if (!sock_flag(sk, SOCK_DEAD))
+				continue;
+			if (lookup_cpt_object(CPT_OBJ_SOCKET, sk, ctx))
+				continue;
+			sock_hold(sk);
+			read_unlock_bh(&tcp_hashinfo.ehash[i].lock);
+
+			local_bh_disable();
+			bh_lock_sock(sk);
+			if (sock_owned_by_user(sk))
+				eprintk_ctx("BUG: sk locked by whom?\n");
+			sk->sk_lock.owner = (void *)1;
+			bh_unlock_sock(sk);
+			local_bh_enable();
+
+			cpt_dump_socket(NULL, sk, -1, -1, ctx);
+
+			local_bh_disable();
+			bh_lock_sock(sk);
+			sk->sk_lock.owner = NULL;
+			clear_backlog(sk);
+			tcp_done(sk);
+			bh_unlock_sock(sk);
+			local_bh_enable();
+			sock_put(sk);
+
+			goto retry;
+		}
+		read_unlock_bh(&tcp_hashinfo.ehash[i].lock);
+	}
+	cpt_close_section(ctx);
+	return 0;
+}
+
+static int can_dump(struct sock *sk, cpt_context_t *ctx)
+{
+	switch (sk->sk_family) {
+	case AF_NETLINK:
+		if (((struct netlink_sock *)sk)->cb) {
+			eprintk_ctx("netlink socket has active callback\n");
+			return 0;
+		}
+		break;
+	}
+	return 1;
+}
+
+/* We are not going to block suspend when we have external AF_UNIX connections.
+ * But we cannot stop feed of new packets/connections to our environment
+ * from outside. Taking into account that it is intrincically unreliable,
+ * we collect some amount of data, but when checkpointing/restoring we
+ * are going to drop everything, which does not make sense: skbs sent
+ * by outside processes, connections from outside etc. etc.
+ */
+
+/* The first pass. When we see socket referenced by a file, we just
+ * add it to socket table */
+int cpt_collect_socket(struct file *file, cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+	struct socket *sock;
+	struct sock *sk;
+
+	if (!S_ISSOCK(file->f_dentry->d_inode->i_mode))
+		return -ENOTSOCK;
+	sock = &container_of(file->f_dentry->d_inode, struct socket_alloc, vfs_inode)->socket;
+	sk = sock->sk;
+	if (!can_dump(sk, ctx))
+		return -EAGAIN;
+	if ((obj = cpt_object_add(CPT_OBJ_SOCKET, sk, ctx)) == NULL)
+		return -ENOMEM;
+	obj->o_parent = file;
+
+	return 0;
+}
+
+/*
+ * We should end with table containing:
+ *  * all sockets opened by our processes in the table.
+ *  * all the sockets queued in listening queues on _our_ listening sockets,
+ *    which are connected to our opened sockets.
+ */
+
+static int collect_one_unix_listening_sock(cpt_object_t *obj, cpt_context_t * ctx)
+{
+	struct sock *sk = obj->o_obj;
+	cpt_object_t *cobj;
+	struct sk_buff *skb;
+
+	skb = skb_peek(&sk->sk_receive_queue);
+	while (skb && skb != (struct sk_buff*)&sk->sk_receive_queue) {
+		struct sock *lsk = skb->sk;
+		if (unix_peer(lsk) &&
+		    lookup_cpt_object(CPT_OBJ_SOCKET, unix_peer(lsk), ctx)) {
+			if ((cobj = cpt_object_add(CPT_OBJ_SOCKET, lsk, ctx)) == NULL)
+				return -ENOMEM;
+			cobj->o_parent = obj->o_parent;
+		}
+		spin_lock_irq(&sk->sk_receive_queue.lock);
+		skb = skb->next;
+		spin_unlock_irq(&sk->sk_receive_queue.lock);
+	}
+
+	return 0;
+}
+
+int cpt_index_sockets(cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+	unsigned long index = 0;
+
+	/* Collect not-yet-accepted children of listening sockets. */
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		struct sock *sk = obj->o_obj;
+
+		if (sk->sk_state != TCP_LISTEN)
+			continue;
+
+		if (sk->sk_family == AF_UNIX)
+			collect_one_unix_listening_sock(obj, ctx);
+	}
+
+	/* Assign indices to all the sockets. */
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		struct sock *sk = obj->o_obj;
+		cpt_obj_setindex(obj, index++, ctx);
+
+		if (sk->sk_socket && sk->sk_socket->file) {
+			cpt_object_t *tobj;
+			tobj = lookup_cpt_object(CPT_OBJ_FILE, sk->sk_socket->file, ctx);
+			if (tobj)
+				cpt_obj_setindex(tobj, obj->o_index, ctx);
+		}
+	}
+
+	return 0;
+}
+
+void cpt_unlock_sockets(cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		struct sock *sk = obj->o_obj;
+		if (sk && obj->o_lock) {
+			if (sk->sk_socket)
+				release_sock(sk);
+		}
+	}
+}
+
+void cpt_kill_sockets(cpt_context_t * ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		struct sock *sk = obj->o_obj;
+		if (sk && obj->o_lock) {
+			cpt_kill_socket(sk, ctx);
+			if (sk->sk_socket)
+				release_sock_nobacklog(sk);
+		}
+	}
+}
+
+__u32 cpt_socket_fasync(struct file *file, struct cpt_context *ctx)
+{
+	struct fasync_struct *fa;
+	struct inode *inode = file->f_dentry->d_inode;
+	struct socket *sock;
+
+	sock = &container_of(inode, struct socket_alloc, vfs_inode)->socket;
+
+	for (fa = sock->fasync_list; fa; fa = fa->fa_next) {
+		if (fa->fa_file == file)
+			return fa->fa_fd;
+	}
+	return -1;
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_socket.h linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_socket.h
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_socket.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_socket.h	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,33 @@
+struct sock;
+
+int cpt_collect_passedfds(cpt_context_t *);
+int cpt_index_sockets(cpt_context_t *);
+int cpt_collect_socket(struct file *, cpt_context_t *);
+int cpt_dump_socket(cpt_object_t *obj, struct sock *sk, int index, int parent, struct cpt_context *ctx);
+int cpt_dump_accept_queue(struct sock *sk, int index, struct cpt_context *ctx);
+int cpt_dump_synwait_queue(struct sock *sk, int index, struct cpt_context *ctx);
+int rst_sockets(struct cpt_context *ctx);
+int rst_sockets_complete(struct cpt_context *ctx);
+int cpt_dump_orphaned_sockets(struct cpt_context *ctx);
+
+int rst_sock_attr(loff_t *pos_p, struct sock *sk, cpt_context_t *ctx);
+struct sk_buff * rst_skb(loff_t *pos_p, __u32 *owner, __u32 *queue, struct cpt_context *ctx);
+
+void cpt_unlock_sockets(cpt_context_t *);
+void cpt_kill_sockets(cpt_context_t *);
+
+
+int cpt_kill_socket(struct sock *, cpt_context_t *);
+int cpt_dump_socket_in(struct cpt_sock_image *, struct sock *, struct cpt_context*);
+int rst_socket_in(struct cpt_sock_image *si, loff_t pos, struct sock *, struct cpt_context *ctx);
+__u32 cpt_socket_fasync(struct file *file, struct cpt_context *ctx);
+int cpt_attach_accept(struct sock *lsk, struct sock *sk, cpt_context_t *);
+int rst_restore_synwait_queue(struct sock *sk, struct cpt_sock_image *si, loff_t pos, struct cpt_context *ctx);
+int cpt_dump_ofo_queue(int idx, struct sock *sk, struct cpt_context *ctx);
+int cpt_dump_skb(int type, int owner, struct sk_buff *skb, struct cpt_context *ctx);
+int cpt_dump_mcfilter(struct sock *sk, struct cpt_context *ctx);
+
+int rst_sk_mcfilter_in(struct sock *sk, struct cpt_sockmc_image *v,
+		       loff_t pos, cpt_context_t *ctx);
+int rst_sk_mcfilter_in6(struct sock *sk, struct cpt_sockmc_image *v,
+			loff_t pos, cpt_context_t *ctx);
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_socket_in.c linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_socket_in.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_socket_in.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_socket_in.c	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,443 @@
+/*
+ *
+ *  kernel/cpt/cpt_socket_in.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/socket.h>
+#include <linux/tcp.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <linux/igmp.h>
+#include <linux/ipv6.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_socket.h"
+#include "cpt_kernel.h"
+
+static inline __u32 jiffies_export(unsigned long tmo)
+{
+	__s32 delta = (long)(tmo - jiffies);
+	return delta;
+}
+
+static inline __u32 tcp_jiffies_export(__u32 tmo)
+{
+	__s32 delta = tmo - tcp_time_stamp;
+	return delta;
+}
+
+int cpt_dump_ofo_queue(int idx, struct sock *sk, struct cpt_context *ctx)
+{
+	struct sk_buff *skb;
+	struct tcp_sock *tp;
+
+	if (sk->sk_type != SOCK_STREAM || sk->sk_protocol != IPPROTO_TCP)
+		return 0;
+
+	tp = tcp_sk(sk);
+
+	skb = skb_peek(&tp->out_of_order_queue);
+	while (skb && skb != (struct sk_buff*)&tp->out_of_order_queue) {
+		int err;
+
+		err = cpt_dump_skb(CPT_SKB_OFOQ, idx, skb, ctx);
+		if (err)
+			return err;
+
+		spin_lock_irq(&tp->out_of_order_queue.lock);
+		skb = skb->next;
+		spin_unlock_irq(&tp->out_of_order_queue.lock);
+	}
+	return 0;
+}
+
+static int cpt_dump_socket_tcp(struct cpt_sock_image *si, struct sock *sk,
+			       struct cpt_context *ctx)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	si->cpt_pred_flags = tp->pred_flags;
+	si->cpt_rcv_nxt = tp->rcv_nxt;
+	si->cpt_snd_nxt = tp->snd_nxt;
+	si->cpt_snd_una = tp->snd_una;
+	si->cpt_snd_sml = tp->snd_sml;
+	si->cpt_rcv_tstamp = tcp_jiffies_export(tp->rcv_tstamp);
+	si->cpt_lsndtime = tcp_jiffies_export(tp->lsndtime);
+	si->cpt_tcp_header_len = tp->tcp_header_len;
+	si->cpt_ack_pending = inet_csk(sk)->icsk_ack.pending;
+	si->cpt_quick = inet_csk(sk)->icsk_ack.quick;
+	si->cpt_pingpong = inet_csk(sk)->icsk_ack.pingpong;
+	si->cpt_blocked = inet_csk(sk)->icsk_ack.blocked;
+	si->cpt_ato = inet_csk(sk)->icsk_ack.ato;
+	si->cpt_ack_timeout = jiffies_export(inet_csk(sk)->icsk_ack.timeout);
+	si->cpt_lrcvtime = tcp_jiffies_export(inet_csk(sk)->icsk_ack.lrcvtime);
+	si->cpt_last_seg_size = inet_csk(sk)->icsk_ack.last_seg_size;
+	si->cpt_rcv_mss = inet_csk(sk)->icsk_ack.rcv_mss;
+	si->cpt_snd_wl1 = tp->snd_wl1;
+	si->cpt_snd_wnd = tp->snd_wnd;
+	si->cpt_max_window = tp->max_window;
+	si->cpt_pmtu_cookie = inet_csk(sk)->icsk_pmtu_cookie;
+	si->cpt_mss_cache = tp->mss_cache;
+	si->cpt_mss_cache_std = tp->mss_cache; /* FIXMW was tp->mss_cache_std */
+	si->cpt_mss_clamp = tp->rx_opt.mss_clamp;
+	si->cpt_ext_header_len = inet_csk(sk)->icsk_ext_hdr_len;
+	si->cpt_ext2_header_len = 0;
+	si->cpt_ca_state = inet_csk(sk)->icsk_ca_state;
+	si->cpt_retransmits = inet_csk(sk)->icsk_retransmits;
+	si->cpt_reordering = tp->reordering;
+	si->cpt_frto_counter = tp->frto_counter;
+	si->cpt_frto_highmark = tp->frto_highmark;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)
+	// // si->cpt_adv_cong = tp->adv_cong;
+#endif
+	si->cpt_defer_accept = inet_csk(sk)->icsk_accept_queue.rskq_defer_accept;
+	si->cpt_backoff = inet_csk(sk)->icsk_backoff;
+	si->cpt_srtt = tp->srtt;
+	si->cpt_mdev = tp->mdev;
+	si->cpt_mdev_max = tp->mdev_max;
+	si->cpt_rttvar = tp->rttvar;
+	si->cpt_rtt_seq = tp->rtt_seq;
+	si->cpt_rto = inet_csk(sk)->icsk_rto;
+	si->cpt_packets_out = tp->packets_out;
+	si->cpt_left_out = tp->left_out;
+	si->cpt_retrans_out = tp->retrans_out;
+	si->cpt_lost_out = tp->lost_out;
+	si->cpt_sacked_out = tp->sacked_out;
+	si->cpt_fackets_out = tp->fackets_out;
+	si->cpt_snd_ssthresh = tp->snd_ssthresh;
+	si->cpt_snd_cwnd = tp->snd_cwnd;
+	si->cpt_snd_cwnd_cnt = tp->snd_cwnd_cnt;
+	si->cpt_snd_cwnd_clamp = tp->snd_cwnd_clamp;
+	si->cpt_snd_cwnd_used = tp->snd_cwnd_used;
+	si->cpt_snd_cwnd_stamp = tcp_jiffies_export(tp->snd_cwnd_stamp);
+	si->cpt_timeout = jiffies_export(inet_csk(sk)->icsk_timeout);
+	si->cpt_ka_timeout = 0;
+	si->cpt_rcv_wnd = tp->rcv_wnd;
+	si->cpt_rcv_wup = tp->rcv_wup;
+	si->cpt_write_seq = tp->write_seq;
+	si->cpt_pushed_seq = tp->pushed_seq;
+	si->cpt_copied_seq = tp->copied_seq;
+	si->cpt_tstamp_ok = tp->rx_opt.tstamp_ok;
+	si->cpt_wscale_ok = tp->rx_opt.wscale_ok;
+	si->cpt_sack_ok = tp->rx_opt.sack_ok;
+	si->cpt_saw_tstamp = tp->rx_opt.saw_tstamp;
+	si->cpt_snd_wscale = tp->rx_opt.snd_wscale;
+	si->cpt_rcv_wscale = tp->rx_opt.rcv_wscale;
+	si->cpt_nonagle = tp->nonagle;
+	si->cpt_keepalive_probes = tp->keepalive_probes;
+	si->cpt_rcv_tsval = tp->rx_opt.rcv_tsval;
+	si->cpt_rcv_tsecr = tp->rx_opt.rcv_tsecr;
+	si->cpt_ts_recent = tp->rx_opt.ts_recent;
+	si->cpt_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
+	si->cpt_user_mss = tp->rx_opt.user_mss;
+	si->cpt_dsack = tp->rx_opt.dsack;
+	si->cpt_eff_sacks = tp->rx_opt.eff_sacks;
+	si->cpt_sack_array[0] = tp->duplicate_sack[0].start_seq;
+	si->cpt_sack_array[1] = tp->duplicate_sack[0].end_seq;
+	si->cpt_sack_array[2] = tp->selective_acks[0].start_seq;
+	si->cpt_sack_array[3] = tp->selective_acks[0].end_seq;
+	si->cpt_sack_array[4] = tp->selective_acks[1].start_seq;
+	si->cpt_sack_array[5] = tp->selective_acks[1].end_seq;
+	si->cpt_sack_array[6] = tp->selective_acks[2].start_seq;
+	si->cpt_sack_array[7] = tp->selective_acks[2].end_seq;
+	si->cpt_sack_array[8] = tp->selective_acks[3].start_seq;
+	si->cpt_sack_array[9] = tp->selective_acks[3].end_seq;
+	si->cpt_window_clamp = tp->window_clamp;
+	si->cpt_rcv_ssthresh = tp->rcv_ssthresh;
+	si->cpt_probes_out = inet_csk(sk)->icsk_probes_out;
+	si->cpt_num_sacks = tp->rx_opt.num_sacks;
+	si->cpt_advmss = tp->advmss;
+	si->cpt_syn_retries = inet_csk(sk)->icsk_syn_retries;
+	si->cpt_ecn_flags = tp->ecn_flags;
+	si->cpt_prior_ssthresh = tp->prior_ssthresh;
+	si->cpt_high_seq = tp->high_seq;
+	si->cpt_retrans_stamp = tp->retrans_stamp;
+	si->cpt_undo_marker = tp->undo_marker;
+	si->cpt_undo_retrans = tp->undo_retrans;
+	si->cpt_urg_seq = tp->urg_seq;
+	si->cpt_urg_data = tp->urg_data;
+	si->cpt_pending = inet_csk(sk)->icsk_pending;
+	si->cpt_urg_mode = tp->urg_mode;
+	si->cpt_snd_up = tp->snd_up;
+	si->cpt_keepalive_time = tp->keepalive_time;
+	si->cpt_keepalive_intvl = tp->keepalive_intvl;
+	si->cpt_linger2 = tp->linger2;
+
+	if (sk->sk_state != TCP_LISTEN &&
+	    sk->sk_state != TCP_CLOSE &&
+	    sock_flag(sk, SOCK_KEEPOPEN)) {
+		si->cpt_ka_timeout = jiffies_export(sk->sk_timer.expires);
+	}
+
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+	{
+		extern struct inet_connection_sock_af_ops ipv6_mapped;
+		if (sk->sk_family == AF_INET6 &&
+			inet_csk(sk)->icsk_af_ops == &ipv6_mapped)
+			si->cpt_mapped = 1;
+	}
+#endif
+
+	return 0;
+}
+
+
+int cpt_dump_socket_in(struct cpt_sock_image *si, struct sock *sk,
+		       struct cpt_context *ctx)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+
+	if (sk->sk_family == AF_INET) {
+		struct sockaddr_in *sin = ((struct sockaddr_in*)si->cpt_laddr);
+		sin->sin_family = AF_INET;
+		sin->sin_port = inet->sport;
+		sin->sin_addr.s_addr = inet->rcv_saddr;
+		si->cpt_laddrlen = sizeof(*sin);
+	} else if (sk->sk_family == AF_INET6) {
+		struct sockaddr_in6 *sin6 = ((struct sockaddr_in6*)si->cpt_laddr);
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = inet->sport;
+		memcpy(&sin6->sin6_addr, &np->rcv_saddr, 16);
+		si->cpt_laddrlen = sizeof(*sin6);
+	}
+	if (!inet->num)
+		si->cpt_laddrlen = 0;
+
+	si->cpt_daddr = inet->daddr;
+	si->cpt_dport = inet->dport;
+	si->cpt_saddr = inet->saddr;
+	si->cpt_rcv_saddr = inet->rcv_saddr;
+	si->cpt_sport = inet->sport;
+	si->cpt_uc_ttl = inet->uc_ttl;
+	si->cpt_tos = inet->tos;
+	si->cpt_cmsg_flags = inet->cmsg_flags;
+	si->cpt_mc_index = inet->mc_index;
+	si->cpt_mc_addr = inet->mc_addr;
+	si->cpt_hdrincl = inet->hdrincl;
+	si->cpt_mc_ttl = inet->mc_ttl;
+	si->cpt_mc_loop = inet->mc_loop;
+	si->cpt_pmtudisc = inet->pmtudisc;
+	si->cpt_recverr = inet->recverr;
+	si->cpt_freebind = inet->freebind;
+	si->cpt_idcounter = inet->id;
+
+	si->cpt_cork_flags = inet->cork.flags;
+	si->cpt_cork_fragsize = 0;
+	si->cpt_cork_length = inet->cork.length;
+	si->cpt_cork_addr = inet->cork.addr;
+	si->cpt_cork_saddr = inet->cork.fl.fl4_src;
+	si->cpt_cork_daddr = inet->cork.fl.fl4_dst;
+	si->cpt_cork_oif = inet->cork.fl.oif;
+	if (inet->cork.rt) {
+		si->cpt_cork_fragsize = inet->cork.fragsize;
+		si->cpt_cork_saddr = inet->cork.rt->fl.fl4_src;
+		si->cpt_cork_daddr = inet->cork.rt->fl.fl4_dst;
+		si->cpt_cork_oif = inet->cork.rt->fl.oif;
+	}
+
+	if (sk->sk_type == SOCK_DGRAM && sk->sk_protocol == IPPROTO_UDP) {
+		struct udp_sock *up = udp_sk(sk);
+		si->cpt_udp_pending  = up->pending;
+		si->cpt_udp_corkflag  = up->corkflag;
+		si->cpt_udp_encap  = up->encap_type;
+		si->cpt_udp_len  = up->len;
+	}
+
+	if (sk->sk_family == AF_INET6) {
+		memcpy(si->cpt_saddr6, &np->saddr, 16);
+		memcpy(si->cpt_rcv_saddr6, &np->rcv_saddr, 16);
+		memcpy(si->cpt_daddr6, &np->daddr, 16);
+		si->cpt_flow_label6 = np->flow_label;
+		si->cpt_frag_size6 = np->frag_size;
+		si->cpt_hop_limit6 = np->hop_limit;
+		si->cpt_mcast_hops6 = np->mcast_hops;
+		si->cpt_mcast_oif6 = np->mcast_oif;
+		si->cpt_rxopt6 = np->rxopt.all;
+		si->cpt_mc_loop6 = np->mc_loop;
+		si->cpt_recverr6 = np->recverr;
+		si->cpt_sndflow6 = np->sndflow;
+		si->cpt_pmtudisc6 = np->pmtudisc;
+		si->cpt_ipv6only6 = np->ipv6only;
+		si->cpt_mapped = 0;
+	}
+
+	if (sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP)
+		cpt_dump_socket_tcp(si, sk, ctx);
+
+	return 0;
+}
+
+int cpt_dump_accept_queue(struct sock *sk, int index, struct cpt_context *ctx)
+{
+	struct request_sock *req;
+
+	for (req=inet_csk(sk)->icsk_accept_queue.rskq_accept_head; req; req=req->dl_next)
+		cpt_dump_socket(NULL, req->sk, -1, index, ctx);
+	return 0;
+}
+
+
+static int dump_openreq(struct request_sock *req, struct sock *sk, int index,
+			struct cpt_context *ctx)
+{
+	struct cpt_openreq_image *v = cpt_get_buf(ctx);
+
+	cpt_open_object(NULL, ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_OPENREQ;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_VOID;
+
+	v->cpt_rcv_isn = tcp_rsk(req)->rcv_isn;
+	v->cpt_snt_isn = tcp_rsk(req)->snt_isn;
+	v->cpt_rmt_port = inet_rsk(req)->rmt_port;
+	v->cpt_mss = req->mss;
+	// // v->cpt_family = (req->class == &or_ipv4 ? AF_INET : AF_INET6);
+	v->cpt_retrans = req->retrans;
+	v->cpt_snd_wscale = inet_rsk(req)->snd_wscale;
+	v->cpt_rcv_wscale = inet_rsk(req)->rcv_wscale;
+	v->cpt_tstamp_ok = inet_rsk(req)->tstamp_ok;
+	v->cpt_sack_ok = inet_rsk(req)->sack_ok;
+	v->cpt_wscale_ok = inet_rsk(req)->wscale_ok;
+	v->cpt_ecn_ok = inet_rsk(req)->ecn_ok;
+	v->cpt_acked = inet_rsk(req)->acked;
+	v->cpt_window_clamp = req->window_clamp;
+	v->cpt_rcv_wnd = req->rcv_wnd;
+	v->cpt_ts_recent = req->ts_recent;
+	v->cpt_expires = jiffies_export(req->expires);
+
+	if (v->cpt_family == AF_INET) {
+		memcpy(v->cpt_loc_addr, &inet_rsk(req)->loc_addr, 4);
+		memcpy(v->cpt_rmt_addr, &inet_rsk(req)->rmt_addr, 4);
+	} else {
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+		memcpy(v->cpt_loc_addr, &inet6_rsk(req)->loc_addr, 16);
+		memcpy(v->cpt_rmt_addr, &inet6_rsk(req)->rmt_addr, 16);
+		v->cpt_iif = inet6_rsk(req)->iif;
+#endif
+	}
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	cpt_close_object(ctx);
+	return 0;
+}
+
+int cpt_dump_synwait_queue(struct sock *sk, int index, struct cpt_context *ctx)
+{
+	struct listen_sock *lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
+	struct request_sock *req;
+	int i;
+
+	for (i=0; i<TCP_SYNQ_HSIZE; i++) {
+		for (req=lopt->syn_table[i]; req; req=req->dl_next) {
+			loff_t saved_obj;
+			cpt_push_object(&saved_obj, ctx);
+			dump_openreq(req, sk, index, ctx);
+			cpt_pop_object(&saved_obj, ctx);
+		}
+	}
+	return 0;
+}
+
+
+int cpt_kill_socket(struct sock *sk, cpt_context_t * ctx)
+{
+	if (sk->sk_state != TCP_CLOSE &&
+	    (sk->sk_family == AF_INET || sk->sk_family == AF_INET6) &&
+	    sk->sk_protocol == IPPROTO_TCP) {
+		if (sk->sk_state != TCP_LISTEN)
+			tcp_set_state(sk, TCP_CLOSE);
+		else
+			sk->sk_prot->disconnect(sk, 0);
+	}
+	return 0;
+}
+
+int cpt_dump_mcfilter(struct sock *sk, cpt_context_t *ctx)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct ip_mc_socklist *iml;
+
+	for (iml = inet->mc_list; iml; iml = iml->next) {
+		struct cpt_sockmc_image smi;
+		int scnt = 0;
+		int i;
+
+		if (iml->sflist)
+			scnt = iml->sflist->sl_count*16;
+
+		smi.cpt_next = sizeof(smi) + scnt;
+		smi.cpt_object = CPT_OBJ_SOCK_MCADDR;
+		smi.cpt_hdrlen = sizeof(smi);
+		smi.cpt_content = CPT_CONTENT_DATA;
+
+		smi.cpt_family = AF_INET;
+		smi.cpt_mode = iml->sfmode;
+		smi.cpt_ifindex = iml->multi.imr_ifindex;
+		memset(&smi.cpt_mcaddr, 0, sizeof(smi.cpt_mcaddr));
+		smi.cpt_mcaddr[0] = iml->multi.imr_multiaddr.s_addr;
+
+		ctx->write(&smi, sizeof(smi), ctx);
+
+		for (i = 0; i < scnt; i++) {
+			u32 addr[4];
+			memset(&addr, 0, sizeof(addr));
+			addr[0] = iml->sflist->sl_addr[i];
+			ctx->write(&addr, sizeof(addr), ctx);
+		}
+	}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	if (sk->sk_family == AF_INET6) {
+		struct ipv6_mc_socklist *mcl;
+		struct ipv6_pinfo *np = inet6_sk(sk);
+
+		for (mcl = np->ipv6_mc_list; mcl; mcl = mcl->next) {
+			struct cpt_sockmc_image smi;
+			int scnt = 0;
+			int i;
+
+			if (mcl->sflist)
+				scnt = mcl->sflist->sl_count*16;
+
+			smi.cpt_next = sizeof(smi) + scnt;
+			smi.cpt_object = CPT_OBJ_SOCK_MCADDR;
+			smi.cpt_hdrlen = sizeof(smi);
+			smi.cpt_content = CPT_CONTENT_DATA;
+
+			smi.cpt_family = AF_INET6;
+			smi.cpt_mode = mcl->sfmode;
+			smi.cpt_ifindex = mcl->ifindex;
+			memcpy(&smi.cpt_mcaddr, &mcl->addr, sizeof(smi.cpt_mcaddr));
+
+			ctx->write(&smi, sizeof(smi), ctx);
+			for (i = 0; i < scnt; i++)
+				ctx->write(&mcl->sflist->sl_addr[i], 16, ctx);
+		}
+	}
+#endif
+	return 0;
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_syscalls.h linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_syscalls.h
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_syscalls.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_syscalls.h	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,100 @@
+#include <linux/unistd.h>
+#include <linux/syscalls.h>
+#include <asm/uaccess.h>
+
+#define WRAP(c, args) return sys_##c args
+#define WRAP2(c, args) int err; mm_segment_t oldfs; \
+	               oldfs = get_fs(); set_fs(KERNEL_DS); \
+                       err = sys_##c args ;\
+                       set_fs(oldfs); \
+                       return err
+
+static inline int sc_close(int fd)
+{
+	WRAP(close, (fd));
+}
+
+static inline int sc_dup2(int fd1, int fd2)
+{
+	WRAP(dup2, (fd1, fd2));
+}
+
+static inline int sc_unlink(char *name)
+{
+	WRAP2(unlink, (name));
+}
+
+static inline int sc_pipe(int *pfd)
+{
+	return do_pipe(pfd);
+}
+
+static inline int sc_mknod(char *name, int mode, int dev)
+{
+	WRAP2(mknod, (name, mode, dev));
+}
+
+static inline int sc_chmod(char *name, int mode)
+{
+	WRAP2(mkdir, (name, mode));
+}
+
+static inline int sc_chown(char *name, int uid, int gid)
+{
+	WRAP2(chown, (name, uid, gid));
+}
+
+static inline int sc_mkdir(char *name, int mode)
+{
+	WRAP2(mkdir, (name, mode));
+}
+
+static inline int sc_rmdir(char *name)
+{
+	WRAP2(rmdir, (name));
+}
+
+static inline int sc_mount(char *mntdev, char *mntpnt, char *type, unsigned long flags)
+{
+	WRAP2(mount, (mntdev ? : "none", mntpnt, type, flags, NULL));
+}
+
+static inline int sc_mprotect(unsigned long start, size_t len,
+			      unsigned long prot)
+{
+	WRAP(mprotect, (start, len, prot));
+}
+
+static inline int sc_mlock(unsigned long start, size_t len)
+{
+	WRAP(mlock, (start, len));
+}
+
+static inline int sc_munlock(unsigned long start, size_t len)
+{
+	WRAP(munlock, (start, len));
+}
+
+static inline int sc_remap_file_pages(unsigned long start, size_t len,
+				      unsigned long prot, unsigned long pgoff,
+				      unsigned long flags)
+{
+	WRAP(remap_file_pages, (start, len, prot, pgoff, flags));
+}
+
+static inline int sc_waitx(int pid, int opt, int *stat_addr)
+{
+	WRAP(wait4, (pid, stat_addr, opt, NULL));
+}
+
+static inline int sc_flock(int fd, int flags)
+{
+	WRAP(flock, (fd, flags));
+}
+
+static inline int sc_open(char* path, int flags, int mode)
+{
+	WRAP(open, (path, flags, mode));
+}
+
+extern int sc_execve(char *cms, char **argv, char **env);
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_sysvipc.c linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_sysvipc.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_sysvipc.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_sysvipc.c	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,317 @@
+/*
+ *
+ *  kernel/cpt/cpt_sysvipc.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/shm.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <asm/uaccess.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_kernel.h"
+
+struct _warg {
+		struct file			*file;
+		struct cpt_sysvshm_image	*v;
+};
+
+static int dump_one_shm(struct shmid_kernel *shp, void *arg)
+{
+	struct _warg *warg = arg;
+	struct cpt_sysvshm_image *v = (struct cpt_sysvshm_image *)warg->v;
+
+	if (shp->shm_file != warg->file)
+		return 0;
+
+	v->cpt_key = shp->shm_perm.key;
+	v->cpt_uid = shp->shm_perm.uid;
+	v->cpt_gid = shp->shm_perm.gid;
+	v->cpt_cuid = shp->shm_perm.cuid;
+	v->cpt_cgid = shp->shm_perm.cgid;
+	v->cpt_mode = shp->shm_perm.mode;
+	v->cpt_seq = shp->shm_perm.seq;
+
+	v->cpt_id = shp->id;
+	v->cpt_segsz = shp->shm_segsz;
+	v->cpt_atime = shp->shm_atim;
+	v->cpt_ctime = shp->shm_ctim;
+	v->cpt_dtime = shp->shm_dtim;
+	v->cpt_creator = shp->shm_cprid;
+	v->cpt_last = shp->shm_lprid;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)
+	v->cpt_mlockuser = shp->mlock_user ? shp->mlock_user->uid : -1;
+#else
+	v->cpt_mlockuser = -1;
+#endif
+	return 1;
+}
+
+int cpt_dump_content_sysvshm(struct file *file, struct cpt_context *ctx)
+{
+	struct cpt_sysvshm_image *v = cpt_get_buf(ctx);
+	struct _warg warg;
+
+	v->cpt_next = sizeof(*v);
+	v->cpt_object = CPT_OBJ_SYSV_SHM;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_VOID;
+
+	warg.file = file;
+	warg.v = v;
+	if (sysvipc_walk_shm(dump_one_shm, &warg) == 0) {
+		cpt_release_buf(ctx);
+		return -ESRCH;
+	}
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+	return 0;
+}
+
+
+int match_sem(int id, struct sem_array *sema, void *arg)
+{
+	if (id != (unsigned long)arg)
+		return 0;
+	return sema->sem_nsems + 1;
+}
+
+static int get_sem_nsem(int id, cpt_context_t *ctx)
+{
+	int res;
+	res = sysvipc_walk_sem(match_sem, (void*)(unsigned long)id);
+	if (res > 0)
+		return res - 1;
+	eprintk_ctx("get_sem_nsem: SYSV semaphore %d not found\n", id);
+	return -ESRCH;
+}
+
+static int dump_one_semundo(struct sem_undo *su, struct cpt_context *ctx)
+{
+	struct cpt_sysvsem_undo_image v;
+	loff_t saved_obj;
+
+	cpt_open_object(NULL, ctx);
+
+	v.cpt_next = CPT_NULL;
+	v.cpt_object = CPT_OBJ_SYSVSEM_UNDO_REC;
+	v.cpt_hdrlen = sizeof(v);
+	v.cpt_content = CPT_CONTENT_SEMUNDO;
+	v.cpt_id = su->semid;
+	v.cpt_nsem = get_sem_nsem(su->semid, ctx);
+	if ((int)v.cpt_nsem < 0)
+		return -ESRCH;
+
+	ctx->write(&v, sizeof(v), ctx);
+
+	cpt_push_object(&saved_obj, ctx);
+	ctx->write(su->semadj, v.cpt_nsem*sizeof(short), ctx);
+	cpt_pop_object(&saved_obj, ctx);
+
+	cpt_close_object(ctx);
+	return 0;
+}
+
+struct sem_warg {
+	int				last_id;
+	struct cpt_sysvsem_image	*v;
+};
+
+static int dump_one_sem(int id, struct sem_array *sma, void *arg)
+{
+	struct sem_warg * warg = (struct sem_warg *)arg;
+	struct cpt_sysvsem_image *v = warg->v;
+	int i;
+
+	if (warg->last_id != -1) {
+		if ((id % IPCMNI) <= warg->last_id)
+			return 0;
+	}
+
+	v->cpt_next = sizeof(*v);
+	v->cpt_object = CPT_OBJ_SYSV_SEM;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_SEMARRAY;
+
+	v->cpt_key = sma->sem_perm.key;
+	v->cpt_uid = sma->sem_perm.uid;
+	v->cpt_gid = sma->sem_perm.gid;
+	v->cpt_cuid = sma->sem_perm.cuid;
+	v->cpt_cgid = sma->sem_perm.cgid;
+	v->cpt_mode = sma->sem_perm.mode;
+	v->cpt_seq = sma->sem_perm.seq;
+
+	v->cpt_id = id;
+	v->cpt_ctime = sma->sem_ctime;
+	v->cpt_otime = sma->sem_otime;
+
+	for (i=0; i<sma->sem_nsems; i++) {
+		struct {
+			__u32 semval;
+			__u32 sempid;
+		} *s = (void*)v + v->cpt_next;
+		if (v->cpt_next >= PAGE_SIZE - sizeof(*s))
+			return -EINVAL;
+		s->semval = sma->sem_base[i].semval;
+		s->sempid = sma->sem_base[i].sempid;
+		v->cpt_next += sizeof(*s);
+	}
+
+	warg->last_id = id % IPCMNI;
+	return 1;
+}
+
+
+int cpt_dump_sysvsem(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+	struct sem_warg warg;
+
+	/* Dumping semaphores is quite tricky because we cannot
+	 * write to dump file under lock inside sysvipc_walk_sem().
+	 */
+	cpt_open_section(ctx, CPT_SECT_SYSV_SEM);
+	warg.last_id = -1;
+	warg.v = cpt_get_buf(ctx);
+	for (;;) {
+		if (sysvipc_walk_sem(dump_one_sem, &warg) <= 0)
+			break;
+		ctx->write(warg.v, warg.v->cpt_next, ctx);
+	}
+	cpt_release_buf(ctx);
+	cpt_close_section(ctx);
+
+	cpt_open_section(ctx, CPT_SECT_SYSVSEM_UNDO);
+	for_each_object(obj, CPT_OBJ_SYSVSEM_UNDO) {
+		struct sem_undo_list *semu = obj->o_obj;
+		struct sem_undo *su;
+		struct cpt_object_hdr v;
+		loff_t saved_obj;
+
+		cpt_open_object(obj, ctx);
+
+		v.cpt_next = CPT_NULL;
+		v.cpt_object = CPT_OBJ_SYSVSEM_UNDO;
+		v.cpt_hdrlen = sizeof(v);
+		v.cpt_content = CPT_CONTENT_ARRAY;
+
+		ctx->write(&v, sizeof(v), ctx);
+
+		cpt_push_object(&saved_obj, ctx);
+		for (su = semu->proc_list; su; su = su->proc_next) {
+			if (su->semid != -1) {
+				int err;
+				err = dump_one_semundo(su, ctx);
+				if (err < 0)
+					return err;
+			}
+		}
+		cpt_pop_object(&saved_obj, ctx);
+
+		cpt_close_object(ctx);
+	}
+	cpt_close_section(ctx);
+	return 0;
+}
+
+static int collect_one_msg(int id, struct msg_queue *msq, void *arg)
+{
+	int *retp = arg;
+	(*retp)++;
+	return 0;
+}
+
+int cpt_collect_sysvmsg(cpt_context_t * ctx)
+{
+	int ret = 0;
+	sysvipc_walk_msg(collect_one_msg, &ret);
+	if (ret) {
+		eprintk_ctx("SYSV msgqueues are not supported, found %d\n", ret);
+		return -EBUSY;
+	}
+	return 0;
+}
+
+static int cpt_collect_sysvsem_undo(cpt_context_t *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+		if (tsk->exit_state) {
+			/* ipc/sem.c forgets to clear tsk->sysvsem.undo_list
+			 * on exit. Grrr... */
+			continue;
+		}
+		if (tsk->sysvsem.undo_list &&
+		    cpt_object_add(CPT_OBJ_SYSVSEM_UNDO, tsk->sysvsem.undo_list, ctx) == NULL)
+			return -ENOMEM;
+	}
+
+	for_each_object(obj, CPT_OBJ_SYSVSEM_UNDO) {
+		struct sem_undo_list *semu = obj->o_obj;
+
+		if (atomic_read(&semu->refcnt) != obj->o_count) {
+			eprintk_ctx("sem_undo_list is referenced outside %d %d\n", obj->o_count, atomic_read(&semu->refcnt));
+			return -EBUSY;
+		}
+	}
+	return 0;
+}
+
+static int collect_one_shm(struct shmid_kernel *shp, void *arg)
+{
+	cpt_context_t *ctx = arg;
+
+	if (__cpt_object_add(CPT_OBJ_FILE, shp->shm_file, GFP_ATOMIC, ctx) == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+int cpt_collect_sysvshm(cpt_context_t * ctx)
+{
+	int err;
+
+	err = sysvipc_walk_shm(collect_one_shm, ctx);
+
+	return err < 0 ? err : 0;
+}
+
+int cpt_collect_sysv(cpt_context_t * ctx)
+{
+	int err;
+
+	err = cpt_collect_sysvsem_undo(ctx);
+	if (err)
+		return err;
+	err = cpt_collect_sysvmsg(ctx);
+	if (err)
+		return err;
+	err = cpt_collect_sysvshm(ctx);
+	if (err)
+		return err;
+
+	return 0;
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_tty.c linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_tty.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_tty.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_tty.c	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,216 @@
+/*
+ *
+ *  kernel/cpt/cpt_tty.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/tty.h>
+#include <asm/uaccess.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+/* We must support at least N_TTY. */
+
+int cpt_dump_content_tty(struct file *file, struct cpt_context *ctx)
+{
+	struct tty_struct *tty = file->private_data;
+	cpt_object_t *obj;
+	struct cpt_obj_ref o;
+	loff_t saved_pos;
+
+	obj = lookup_cpt_object(CPT_OBJ_TTY, tty, ctx);
+	if (!obj)
+		return -EINVAL;
+
+	cpt_push_object(&saved_pos, ctx);
+
+	o.cpt_next = sizeof(o);
+	o.cpt_object = CPT_OBJ_REF;
+	o.cpt_hdrlen = sizeof(o);
+	o.cpt_content = CPT_CONTENT_VOID;
+	o.cpt_pos = obj->o_pos;
+	ctx->write(&o, sizeof(o), ctx);
+
+	cpt_pop_object(&saved_pos, ctx);
+
+	return 0;
+}
+
+int cpt_collect_tty(struct file *file, cpt_context_t * ctx)
+{
+	struct tty_struct *tty = file->private_data;
+
+	if (tty) {
+		if (cpt_object_add(CPT_OBJ_TTY, tty, ctx) == NULL)
+			return -ENOMEM;
+		if (tty->link) {
+			cpt_object_t *obj;
+
+			obj = cpt_object_add(CPT_OBJ_TTY, tty->link, ctx);
+			if (obj == NULL)
+				return -ENOMEM;
+			/* Undo o_count, tty->link is not a reference */
+			obj->o_count--;
+		}
+	}
+	return 0;
+}
+
+int cpt_dump_tty(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct tty_struct *tty = obj->o_obj;
+	struct cpt_tty_image *v;
+
+	if (tty->link) {
+		if (lookup_cpt_object(CPT_OBJ_TTY, tty->link, ctx) == NULL) {
+			eprintk_ctx("orphan pty %s %d\n", tty->name, tty->driver->subtype == PTY_TYPE_SLAVE);
+			return -EINVAL;
+		}
+		if (tty->link->link != tty) {
+			eprintk_ctx("bad pty pair\n");
+			return -EINVAL;
+		}
+		if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
+		    tty->driver->subtype == PTY_TYPE_SLAVE &&
+		    tty->link->count)
+			obj->o_count++;
+	}
+	if (obj->o_count != tty->count) {
+		eprintk_ctx("tty %s is referenced outside %d %d\n", tty->name, obj->o_count, tty->count);
+		return -EBUSY;
+	}
+
+	cpt_open_object(obj, ctx);
+
+	v = cpt_get_buf(ctx);
+	v->cpt_next = -1;
+	v->cpt_object = CPT_OBJ_TTY;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_ARRAY;
+
+	v->cpt_index = tty->index;
+	v->cpt_link = -1;
+	if (tty->link)
+		v->cpt_link = tty->link->index;
+	v->cpt_drv_type = tty->driver->type;
+	v->cpt_drv_subtype = tty->driver->subtype;
+	v->cpt_drv_flags = tty->driver->flags;
+	v->cpt_packet = tty->packet;
+	v->cpt_stopped = tty->stopped;
+	v->cpt_hw_stopped = tty->hw_stopped;
+	v->cpt_flow_stopped = tty->flow_stopped;
+	v->cpt_flags = tty->flags;
+	v->cpt_ctrl_status = tty->ctrl_status;
+	v->cpt_canon_data = tty->canon_data;
+	v->cpt_canon_head = tty->canon_head - tty->read_tail;
+	v->cpt_canon_column = tty->canon_column;
+	v->cpt_column = tty->column;
+	v->cpt_erasing = tty->erasing;
+	v->cpt_lnext = tty->lnext;
+	v->cpt_icanon = tty->icanon;
+	v->cpt_raw = tty->raw;
+	v->cpt_real_raw = tty->real_raw;
+	v->cpt_closing = tty->closing;
+	v->cpt_minimum_to_wake = tty->minimum_to_wake;
+	v->cpt_pgrp = 0;
+	if (tty->pgrp > 0) {
+		v->cpt_pgrp = _pid_type_to_vpid(PIDTYPE_PGID, tty->pgrp);
+		if ((int)v->cpt_pgrp < 0) {
+			dprintk_ctx("cannot map tty->pgrp %d -> %d\n", tty->pgrp, (int)v->cpt_pgrp);
+			v->cpt_pgrp = -1;
+		}
+	}
+	v->cpt_session = 0;
+	if (tty->session > 0) {
+		v->cpt_session = _pid_type_to_vpid(PIDTYPE_SID, tty->session);
+		if ((int)v->cpt_session < 0) {
+			eprintk_ctx("cannot map tty->session %d -> %d\n", tty->session, (int)v->cpt_session);
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+	}
+	memcpy(v->cpt_name, tty->name, 64);
+	v->cpt_ws_row = tty->winsize.ws_row;
+	v->cpt_ws_col = tty->winsize.ws_col;
+	v->cpt_ws_prow = tty->winsize.ws_ypixel;
+	v->cpt_ws_pcol = tty->winsize.ws_xpixel;
+	if (tty->termios == NULL) {
+		eprintk_ctx("NULL termios");
+		cpt_release_buf(ctx);
+		return -EINVAL;
+	}
+	v->cpt_c_line = tty->termios->c_line;
+	v->cpt_c_iflag = tty->termios->c_iflag;
+	v->cpt_c_oflag = tty->termios->c_oflag;
+	v->cpt_c_cflag = tty->termios->c_cflag;
+	v->cpt_c_lflag = tty->termios->c_lflag;
+	memcpy(v->cpt_c_cc, tty->termios->c_cc, NCCS);
+	if (NCCS < 32)
+		memset(v->cpt_c_cc + NCCS, 255, 32 - NCCS);
+	memcpy(v->cpt_read_flags, tty->read_flags, sizeof(v->cpt_read_flags));
+
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_release_buf(ctx);
+
+	if (tty->read_buf && tty->read_cnt) {
+		struct cpt_obj_bits *v = cpt_get_buf(ctx);
+		loff_t saved_pos;
+
+		cpt_push_object(&saved_pos, ctx);
+		cpt_open_object(NULL, ctx);
+		v->cpt_next = CPT_NULL;
+		v->cpt_object = CPT_OBJ_BITS;
+		v->cpt_hdrlen = sizeof(*v);
+		v->cpt_content = CPT_CONTENT_DATA;
+		v->cpt_size = tty->read_cnt;
+		ctx->write(v, sizeof(*v), ctx);
+		cpt_release_buf(ctx);
+
+		if (tty->read_cnt) {
+			int n = min(tty->read_cnt, N_TTY_BUF_SIZE - tty->read_tail);
+			ctx->write(tty->read_buf + tty->read_tail, n, ctx);
+			if (tty->read_cnt > n)
+				ctx->write(tty->read_buf, tty->read_cnt-n, ctx);
+			ctx->align(ctx);
+		}
+
+		cpt_close_object(ctx);
+		cpt_pop_object(&saved_pos, ctx);
+	}
+
+	cpt_close_object(ctx);
+
+	return 0;
+}
+
+__u32 cpt_tty_fasync(struct file *file, struct cpt_context *ctx)
+{
+	struct tty_struct * tty;
+	struct fasync_struct *fa;
+
+	tty = (struct tty_struct *)file->private_data;
+
+	for (fa = tty->fasync; fa; fa = fa->fa_next) {
+		if (fa->fa_file == file)
+			return fa->fa_fd;
+	}
+	return -1;
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_ubc.c linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_ubc.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_ubc.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_ubc.c	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,132 @@
+/*
+ *
+ *  kernel/cpt/cpt_ubc.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/types.h>
+#include <ub/beancounter.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+cpt_object_t *cpt_add_ubc(struct user_beancounter *bc, struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	obj = cpt_object_add(CPT_OBJ_UBC, bc, ctx);
+	if (obj != NULL) {
+		if (obj->o_count == 1)
+			get_beancounter(bc);
+		if (bc->parent != NULL && obj->o_parent == NULL)
+			obj->o_parent = cpt_add_ubc(bc->parent, ctx);
+	}
+	return obj;
+}
+
+__u64 cpt_lookup_ubc(struct user_beancounter *bc, struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	obj = lookup_cpt_object(CPT_OBJ_UBC, bc, ctx);
+	if (obj == NULL) {
+		char buf[48];
+		print_ub_uid(bc, buf, sizeof(buf));
+		eprintk("CPT: unknown ub %s (%p)\n", buf, bc);
+		dump_stack();
+		return CPT_NULL;
+	}
+	return obj->o_pos;
+}
+
+static void dump_one_bc_parm(struct cpt_ubparm *dmp, struct ubparm *prm,
+		int held)
+{
+	dmp->barrier = (prm->barrier < UB_MAXVALUE ? prm->barrier : CPT_NULL);
+	dmp->limit = (prm->limit < UB_MAXVALUE ? prm->limit : CPT_NULL);
+	dmp->held = (held ? prm->held : CPT_NULL);
+	dmp->maxheld = prm->maxheld;
+	dmp->minheld = prm->minheld;
+	dmp->failcnt = prm->failcnt;
+}
+
+static int dump_one_bc(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct user_beancounter *bc;
+	struct cpt_beancounter_image *v;
+	int i;
+
+	bc = obj->o_obj;
+	v = cpt_get_buf(ctx);
+
+	v->cpt_next = CPT_NULL;
+	v->cpt_object = CPT_OBJ_UBC;
+	v->cpt_hdrlen = sizeof(*v);
+	v->cpt_content = CPT_CONTENT_VOID;
+
+	if (obj->o_parent != NULL)
+		v->cpt_parent = ((cpt_object_t *)obj->o_parent)->o_pos;
+	else
+		v->cpt_parent = CPT_NULL;
+	v->cpt_id = (obj->o_parent != NULL) ? bc->ub_uid : 0;
+	for (i = 0; i < UB_RESOURCES; i++) {
+		dump_one_bc_parm(v->cpt_parms + i * 2, bc->ub_parms + i, 0);
+		dump_one_bc_parm(v->cpt_parms + i * 2 + 1, bc->ub_store + i, 1);
+	}
+	memset(v->cpt_parms + UB_RESOURCES * 2, 0,
+			sizeof(v->cpt_parms)
+				- UB_RESOURCES * 2 * sizeof(v->cpt_parms[0]));
+
+	cpt_open_object(obj, ctx);
+	ctx->write(v, sizeof(*v), ctx);
+	cpt_close_object(ctx);
+
+	cpt_release_buf(ctx);
+	return 0;
+}
+
+int cpt_dump_ubc(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+	int skipped;
+	int top;
+
+	cpt_open_section(ctx, CPT_SECT_UBC);
+
+	do {
+		skipped = 0;
+		top = 0;
+		for_each_object(obj, CPT_OBJ_UBC) {
+			if (obj->o_parent == NULL)
+				top++;
+			if (obj->o_pos != CPT_NULL)
+				continue;
+			if (obj->o_parent != NULL &&
+			    ((cpt_object_t *)obj->o_parent)->o_pos == CPT_NULL)
+				skipped++;
+			else
+				dump_one_bc(obj, ctx);
+		}
+	} while (skipped && (top < 2));
+
+	cpt_close_section(ctx);
+	if (top > 1) {
+		eprintk_ctx("More than one top level ub exist");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+void cpt_finish_ubc(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_UBC)
+		put_beancounter(obj->o_obj);
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_ubc.h linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_ubc.h
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_ubc.h	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_ubc.h	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,22 @@
+#ifdef CONFIG_USER_RESOURCE
+cpt_object_t *cpt_add_ubc(struct user_beancounter *bc, struct cpt_context *ctx);
+__u64 cpt_lookup_ubc(struct user_beancounter *bc, struct cpt_context *ctx);
+int cpt_dump_ubc(struct cpt_context *ctx);
+
+struct user_beancounter *rst_lookup_ubc(__u64 pos, struct cpt_context *ctx);
+int rst_undump_ubc(struct cpt_context *ctx);
+
+void cpt_finish_ubc(struct cpt_context *ctx);
+void rst_finish_ubc(struct cpt_context *ctx);
+void copy_one_ubparm(struct ubparm *from, struct ubparm *to, int bc_parm_id);
+#else
+static int inline cpt_dump_ubc(struct cpt_context *ctx)
+{ return 0; }
+static int inline rst_undump_ubc(struct cpt_context *ctx)
+{ return 0; }
+static void inline cpt_finish_ubc(struct cpt_context *ctx)
+{ return; }
+static void inline rst_finish_ubc(struct cpt_context *ctx)
+{ return; }
+#endif
+
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_x8664.S linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_x8664.S
--- linux-2.6.16.46-0.12.orig/kernel/cpt/cpt_x8664.S	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/cpt_x8664.S	2007-08-28 17:35:36.000000000 +0400
@@ -0,0 +1,62 @@
+#define ASSEMBLY 1
+#include <linux/config.h>
+
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/smp.h>
+#include <asm/cache.h>
+#include <asm/errno.h>
+#include <asm/dwarf2.h>
+#include <asm/calling.h>
+#include <asm/msr.h>
+#include <asm/unistd.h>
+#include <asm/thread_info.h>
+#include <asm/hw_irq.h>
+#include <asm/errno.h>
+
+	.code64
+
+	.macro FAKE_STACK_FRAME child_rip
+	/* push in order ss, rsp, eflags, cs, rip */
+	xorq %rax, %rax
+	pushq %rax /* ss */
+	pushq %rax /* rsp */
+	pushq $(1<<9) /* eflags - interrupts on */
+	pushq $__KERNEL_CS /* cs */
+	pushq \child_rip /* rip */
+	pushq	%rax /* orig rax */
+	.endm
+
+	.macro UNFAKE_STACK_FRAME
+	addq $8*6, %rsp
+	.endm
+
+ENTRY(asm_kernel_thread)
+	FAKE_STACK_FRAME $child_rip
+	SAVE_ALL
+
+	# rdi: flags, rsi: usp, rdx: will be &pt_regs
+	movq %rdx,%rdi
+	orq  $0x00800000,%rdi
+	movq $-1, %rsi
+	movq %rsp, %rdx
+
+	xorl %r8d,%r8d
+	xorl %r9d,%r9d
+	pushq %rcx
+	call do_fork_pid
+	addq $8, %rsp
+	/* call do_fork */
+	movq %rax,RAX(%rsp)
+	xorl %edi,%edi
+	RESTORE_ALL
+	UNFAKE_STACK_FRAME
+	ret
+
+child_rip:
+	pushq $0		# fake return address
+	movq %rdi, %rax
+	movq %rsi, %rdi
+	call *%rax
+	movq %rax, %rdi
+	call do_exit
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/rst_conntrack.c linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_conntrack.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/rst_conntrack.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_conntrack.c	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,286 @@
+/*
+ *
+ *  kernel/cpt/rst_conntrack.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/socket.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/unistd.h>
+#include <linux/ve.h>
+#include <linux/vzcalluser.h>
+#include <linux/cpt_image.h>
+#include <linux/icmp.h>
+#include <linux/ip.h>
+
+#if defined(CONFIG_VE_IPTABLES) && \
+    (defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE))
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/ip_nat_helper.h>
+#include <linux/netfilter_ipv4/ip_nat_core.h>
+
+#define ASSERT_READ_LOCK(x) do { } while (0)
+#define ASSERT_WRITE_LOCK(x) do { } while (0)
+
+#include <linux/netfilter_ipv4/listhelp.h>
+
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+struct ct_holder
+{
+	struct ct_holder *next;
+	struct ip_conntrack *ct;
+	int index;
+};
+
+static void decode_tuple(struct cpt_ipct_tuple *v, struct ip_conntrack_tuple *tuple, int dir)
+{
+	tuple->dst.ip = v->cpt_dst;
+	tuple->dst.u.all = v->cpt_dstport;
+	tuple->dst.protonum = v->cpt_protonum;
+	tuple->dst.dir = v->cpt_dir;
+	if (dir != tuple->dst.dir)
+		wprintk("dir != tuple->dst.dir\n");
+
+	tuple->src.ip = v->cpt_src;
+	tuple->src.u.all = v->cpt_srcport;
+}
+
+
+static int undump_expect_list(struct ip_conntrack *ct,
+			      struct cpt_ip_conntrack_image *ci,
+			      loff_t pos, struct ct_holder *ct_list,
+			      cpt_context_t *ctx)
+{
+	loff_t end;
+	int err;
+
+	end = pos + ci->cpt_next;
+	pos += ci->cpt_hdrlen;
+	while (pos < end) {
+		struct cpt_ip_connexpect_image v;
+		struct ip_conntrack_expect *exp;
+		struct ip_conntrack *sibling;
+
+		err = rst_get_object(CPT_OBJ_NET_CONNTRACK_EXPECT, pos, &v, ctx);
+		if (err)
+			return err;
+
+		sibling = NULL;
+		if (v.cpt_sibling_conntrack) {
+			struct ct_holder *c;
+
+			for (c = ct_list; c; c = c->next) {
+				if (c->index == v.cpt_sibling_conntrack) {
+					sibling = c->ct;
+					break;
+				}
+			}
+			if (!sibling) {
+				eprintk_ctx("lost sibling of expectation\n");
+				return -EINVAL;
+			}
+		}
+
+		write_lock_bh(&ip_conntrack_lock);
+
+		/* It is possible. Helper module could be just unregistered,
+		 * if expectation were on the list, it would be destroyed. */
+		if (ct->helper == NULL) {
+			write_unlock_bh(&ip_conntrack_lock);
+			dprintk_ctx("conntrack: no helper and non-trivial expectation\n");
+			continue;
+		}
+
+		exp = ip_conntrack_expect_alloc(NULL);
+		if (exp == NULL) {
+			write_unlock_bh(&ip_conntrack_lock);
+			return -ENOMEM;
+		}
+
+		if (ct->helper->timeout && !del_timer(&exp->timeout)) {
+			/* Dying already. We can do nothing. */
+			write_unlock_bh(&ip_conntrack_lock);
+			dprintk_ctx("conntrack expectation is dying\n");
+			continue;
+		}
+
+		decode_tuple(&v.cpt_tuple, &exp->tuple, 0);
+		decode_tuple(&v.cpt_mask, &exp->mask, 0);
+
+		exp->master = ct;
+		nf_conntrack_get(&ct->ct_general);
+		ip_conntrack_expect_insert(exp);
+#if 0
+		if (sibling) {
+			exp->sibling = sibling;
+			sibling->master = exp;
+			LIST_DELETE(&ve_ip_conntrack_expect_list, exp);
+			ct->expecting--;
+			nf_conntrack_get(&master_ct(sibling)->infos[0]);
+		} else
+#endif
+		if (ct->helper->timeout) {
+			exp->timeout.expires = jiffies + v.cpt_timeout;
+			add_timer(&exp->timeout);
+		}
+		write_unlock_bh(&ip_conntrack_lock);
+
+		pos += v.cpt_next;
+	}
+	return 0;
+}
+
+static int undump_one_ct(struct cpt_ip_conntrack_image *ci, loff_t pos,
+			 struct ct_holder **ct_list, cpt_context_t *ctx)
+{
+	int err = 0;
+	struct ip_conntrack *conntrack;
+	struct ct_holder *c;
+	struct ip_conntrack_tuple orig, repl;
+
+	c = kmalloc(sizeof(struct ct_holder), GFP_KERNEL);
+	if (c == NULL)
+		return -ENOMEM;
+
+	decode_tuple(&ci->cpt_tuple[0], &orig, 0);
+	decode_tuple(&ci->cpt_tuple[1], &repl, 1);
+
+	conntrack = ip_conntrack_alloc(&orig, &repl, get_exec_env()->_ip_conntrack->ub);
+	if (!conntrack || IS_ERR(conntrack)) {
+		kfree(c);
+		return -ENOMEM;
+	}
+
+	c->ct = conntrack;
+	c->next = *ct_list;
+	*ct_list = c;
+	c->index = ci->cpt_index;
+
+	decode_tuple(&ci->cpt_tuple[0], &conntrack->tuplehash[0].tuple, 0);
+	decode_tuple(&ci->cpt_tuple[1], &conntrack->tuplehash[1].tuple, 1);
+
+	conntrack->status = ci->cpt_status;
+
+	memcpy(&conntrack->proto, ci->cpt_proto_data, sizeof(conntrack->proto));
+	memcpy(&conntrack->help, ci->cpt_help_data, sizeof(conntrack->help));
+
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
+	defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
+	conntrack->nat.masq_index = ci->cpt_masq_index;
+#endif
+	if (ci->cpt_initialized) {
+		conntrack->nat.info.seq[0].correction_pos = ci->cpt_nat_seq[0].cpt_correction_pos;
+		conntrack->nat.info.seq[0].offset_before = ci->cpt_nat_seq[0].cpt_offset_before;
+		conntrack->nat.info.seq[0].offset_after = ci->cpt_nat_seq[0].cpt_offset_after;
+		conntrack->nat.info.seq[1].correction_pos = ci->cpt_nat_seq[1].cpt_correction_pos;
+		conntrack->nat.info.seq[1].offset_before = ci->cpt_nat_seq[1].cpt_offset_before;
+		conntrack->nat.info.seq[1].offset_after = ci->cpt_nat_seq[1].cpt_offset_after;
+	}
+	if (conntrack->status & IPS_NAT_DONE_MASK)
+		ip_nat_hash_conntrack(conntrack);
+#endif
+
+	if (ci->cpt_ct_helper) {
+		conntrack->helper = ip_conntrack_helper_find_get(&conntrack->tuplehash[1].tuple);
+		if (conntrack->helper == NULL) {
+			eprintk_ctx("conntrack: cannot find helper, some module is not loaded\n");
+			err = -EINVAL;
+		}
+	}
+
+	ip_conntrack_hash_insert(conntrack);
+	conntrack->timeout.expires = jiffies + ci->cpt_timeout;
+
+	if (err == 0 && ci->cpt_next > ci->cpt_hdrlen)
+		err = undump_expect_list(conntrack, ci, pos, *ct_list, ctx);
+
+	return err;
+}
+
+int rst_restore_ip_conntrack(struct cpt_context * ctx)
+{
+	int err = 0;
+	loff_t sec = ctx->sections[CPT_SECT_NET_CONNTRACK];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_ip_conntrack_image ci;
+	struct ct_holder *c;
+	struct ct_holder *ct_list = NULL;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	if (sizeof(ci.cpt_proto_data) != sizeof(union ip_conntrack_proto)) {
+		eprintk_ctx("conntrack module ct->proto version mismatch\n");
+		return -EINVAL;
+	}
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_NET_CONNTRACK || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		err = rst_get_object(CPT_OBJ_NET_CONNTRACK, sec, &ci, ctx);
+		if (err)
+			break;
+		err = undump_one_ct(&ci, sec, &ct_list, ctx);
+		if (err)
+			break;
+		sec += ci.cpt_next;
+	}
+
+	while ((c = ct_list) != NULL) {
+		ct_list = c->next;
+		if (c->ct)
+			add_timer(&c->ct->timeout);
+		kfree(c);
+	}
+
+	return err;
+}
+
+#else
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+int rst_restore_ip_conntrack(struct cpt_context * ctx)
+{
+	if (ctx->sections[CPT_SECT_NET_CONNTRACK] != CPT_NULL)
+		return -EINVAL;
+	return 0;
+}
+
+#endif
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/rst_context.c linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_context.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/rst_context.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_context.c	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,324 @@
+/*
+ *
+ *  kernel/cpt/rst_context.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+static ssize_t file_read(void *addr, size_t count, struct cpt_context *ctx)
+{
+	mm_segment_t oldfs;
+	ssize_t err = -EBADF;
+	struct file *file = ctx->file;
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	if (file)
+		err = file->f_op->read(file, addr, count, &file->f_pos);
+	set_fs(oldfs);
+	if (err != count)
+		return err >= 0 ? -EIO : err;
+	return 0;
+}
+
+static ssize_t file_pread(void *addr, size_t count, struct cpt_context *ctx, loff_t pos)
+{
+	mm_segment_t oldfs;
+	ssize_t err = -EBADF;
+	struct file *file = ctx->file;
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	if (file)
+		err = file->f_op->read(file, addr, count, &pos);
+	set_fs(oldfs);
+	if (err != count)
+		return err >= 0 ? -EIO : err;
+	return 0;
+}
+
+static void file_align(struct cpt_context *ctx)
+{
+	struct file *file = ctx->file;
+
+	if (file)
+		file->f_pos = CPT_ALIGN(file->f_pos);
+}
+
+int rst_get_section(int type, struct cpt_context *ctx, loff_t *start, loff_t *end)
+{
+	struct cpt_section_hdr hdr;
+	int err;
+	loff_t pos;
+
+	pos = ctx->sections[type];
+	*start = *end = pos;
+
+	if (pos != CPT_NULL) {
+		if ((err = ctx->pread(&hdr, sizeof(hdr), ctx, pos)) != 0)
+			return err;
+		if (hdr.cpt_section != type || hdr.cpt_hdrlen < sizeof(hdr))
+			return -EINVAL;
+		*start = pos + hdr.cpt_hdrlen;
+		*end = pos + hdr.cpt_next;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(rst_get_section);
+
+void rst_context_init(struct cpt_context *ctx)
+{
+	int i;
+
+	memset(ctx, 0, sizeof(*ctx));
+
+	init_MUTEX(&ctx->main_sem);
+	ctx->refcount = 1;
+
+	ctx->current_section = -1;
+	ctx->current_object = -1;
+	ctx->pagesize = PAGE_SIZE;
+	ctx->read = file_read;
+	ctx->pread = file_pread;
+	ctx->align = file_align;
+	for (i=0; i < CPT_SECT_MAX; i++)
+		ctx->sections[i] = CPT_NULL;
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	init_completion(&ctx->pgin_notify);
+#endif
+	cpt_object_init(ctx);
+}
+
+static int parse_sections(loff_t start, loff_t end, cpt_context_t *ctx)
+{
+	struct cpt_section_hdr h;
+
+	while (start < end) {
+		int err;
+
+		err = ctx->pread(&h, sizeof(h), ctx, start);
+		if (err)
+			return err;
+		if (h.cpt_hdrlen < sizeof(h) ||
+		    h.cpt_next < h.cpt_hdrlen ||
+		    start + h.cpt_next > end)
+			return -EINVAL;
+		if (h.cpt_section >= CPT_SECT_MAX)
+			return -EINVAL;
+		ctx->sections[h.cpt_section] = start;
+		start += h.cpt_next;
+	}
+	return 0;
+}
+
+int rst_open_dumpfile(struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_major_tail *v;
+	struct cpt_major_hdr  h;
+	unsigned long size;
+
+	err = -EBADF;
+	if (!ctx->file)
+		goto err_out;
+
+	err = -ENOMEM;
+	ctx->tmpbuf = (char*)__get_free_page(GFP_KERNEL);
+	if (ctx->tmpbuf == NULL)
+		goto err_out;
+	__cpt_release_buf(ctx);
+
+	size = ctx->file->f_dentry->d_inode->i_size;
+
+	if (size & 7) {
+		err = -EINVAL;
+		goto err_out;
+	}
+	if (size < sizeof(struct cpt_major_hdr) +
+	    sizeof(struct cpt_major_tail)) {
+		err = -EINVAL;
+		goto err_out;
+	}
+	err = ctx->pread(&h, sizeof(h), ctx, 0);
+	if (err) {
+		eprintk_ctx("too short image 1 %d\n", err);
+		goto err_out;
+	}
+	if (h.cpt_signature[0] != CPT_SIGNATURE0 ||
+	    h.cpt_signature[1] != CPT_SIGNATURE1 ||
+	    h.cpt_signature[2] != CPT_SIGNATURE2 ||
+	    h.cpt_signature[3] != CPT_SIGNATURE3) {
+		err = -EINVAL;
+		goto err_out;
+	}
+	if (h.cpt_hz != HZ) {
+		err = -EINVAL;
+		eprintk_ctx("HZ mismatch: %d != %d\n", h.cpt_hz, HZ);
+		goto err_out;
+	}
+	ctx->virt_jiffies64 = h.cpt_start_jiffies64;
+	ctx->start_time.tv_sec = h.cpt_start_sec;
+	ctx->start_time.tv_nsec = h.cpt_start_nsec;
+	ctx->kernel_config_flags = h.cpt_kernel_config[0];
+	ctx->iptables_mask = h.cpt_iptables_mask;
+	if (h.cpt_image_version > CPT_VERSION_18 ||
+			CPT_VERSION_MINOR(h.cpt_image_version) > 1) {
+		eprintk_ctx("Unknown image version: %x. Can't restore.\n",
+				h.cpt_image_version);
+		err = -EINVAL;
+		goto err_out;
+	}
+	ctx->image_version = h.cpt_image_version;
+	ctx->features = (__u64)((__u64)h.cpt_ve_features2<<32 | h.cpt_ve_features);
+	ctx->image_arch = h.cpt_os_arch;
+
+	v = cpt_get_buf(ctx);
+	err = ctx->pread(v, sizeof(*v), ctx, size - sizeof(*v));
+	if (err) {
+		eprintk_ctx("too short image 2 %d\n", err);
+		cpt_release_buf(ctx);
+		goto err_out;
+	}
+	if (v->cpt_signature[0] != CPT_SIGNATURE0 ||
+	    v->cpt_signature[1] != CPT_SIGNATURE1 ||
+	    v->cpt_signature[2] != CPT_SIGNATURE2 ||
+	    v->cpt_signature[3] != CPT_SIGNATURE3 ||
+	    v->cpt_nsect != CPT_SECT_MAX_INDEX) {
+		err = -EINVAL;
+		cpt_release_buf(ctx);
+		goto err_out;
+	}
+	if ((err = parse_sections(h.cpt_hdrlen, size - sizeof(*v) - sizeof(struct cpt_section_hdr), ctx)) < 0) {
+		cpt_release_buf(ctx);
+		goto err_out;
+	}
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	ctx->lazypages = v->cpt_lazypages;
+#endif
+	ctx->tasks64 = v->cpt_64bit;
+	cpt_release_buf(ctx);
+	return 0;
+
+err_out:
+	if (ctx->tmpbuf) {
+		free_page((unsigned long)ctx->tmpbuf);
+		ctx->tmpbuf = NULL;
+	}
+	return err;
+}
+
+void rst_close_dumpfile(struct cpt_context *ctx)
+{
+	if (ctx->file) {
+		fput(ctx->file);
+		ctx->file = NULL;
+	}
+	if (ctx->tmpbuf) {
+		free_page((unsigned long)ctx->tmpbuf);
+		ctx->tmpbuf = NULL;
+	}
+}
+
+int _rst_get_object(int type, loff_t pos, void *tmp, int size, struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_object_hdr *hdr = tmp;
+	err = ctx->pread(hdr, sizeof(struct cpt_object_hdr), ctx, pos);
+	if (err)
+		return err;
+	if (type > 0 && type != hdr->cpt_object)
+		return -EINVAL;
+	if (hdr->cpt_hdrlen > hdr->cpt_next)
+		return -EINVAL;
+	if (hdr->cpt_hdrlen < sizeof(struct cpt_object_hdr))
+		return -EINVAL;
+	if (size < sizeof(*hdr))
+		return -EINVAL;
+	if (size > hdr->cpt_hdrlen)
+		size = hdr->cpt_hdrlen;
+	if (size > sizeof(*hdr))
+		err = ctx->pread(hdr+1, size - sizeof(*hdr),
+				 ctx, pos + sizeof(*hdr));
+	return err;
+}
+EXPORT_SYMBOL(_rst_get_object);
+
+void * __rst_get_object(int type, loff_t pos, struct cpt_context *ctx)
+{
+	int err;
+	void *tmp;
+	struct cpt_object_hdr hdr;
+	err = ctx->pread(&hdr, sizeof(hdr), ctx, pos);
+	if (err)
+		return NULL;
+	if (type > 0 && type != hdr.cpt_object)
+		return NULL;
+	if (hdr.cpt_hdrlen > hdr.cpt_next)
+		return NULL;
+	if (hdr.cpt_hdrlen < sizeof(struct cpt_object_hdr))
+		return NULL;
+	tmp = kmalloc(hdr.cpt_hdrlen, GFP_KERNEL);
+	if (!tmp)
+		return NULL;
+	err = ctx->pread(tmp, hdr.cpt_hdrlen, ctx, pos);
+	if (!err)
+		return tmp;
+	kfree(tmp);
+	return NULL;
+}
+EXPORT_SYMBOL(__rst_get_object);
+
+__u8 *__rst_get_name(loff_t *pos_p, struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_object_hdr hdr;
+	__u8 *name;
+
+	err = rst_get_object(CPT_OBJ_NAME, *pos_p, &hdr, ctx);
+	if (err)
+		return NULL;
+	if (hdr.cpt_next - hdr.cpt_hdrlen > PAGE_SIZE)
+		return NULL;
+	name = (void*)__get_free_page(GFP_KERNEL);
+	if (!name)
+		return NULL;
+	err = ctx->pread(name, hdr.cpt_next - hdr.cpt_hdrlen,
+		   ctx, *pos_p + hdr.cpt_hdrlen);
+	if (err) {
+		free_page((unsigned long)name);
+		return NULL;
+	}
+	*pos_p += hdr.cpt_next;
+	return name;
+}
+
+__u8 *rst_get_name(loff_t pos, struct cpt_context *ctx)
+{
+	return __rst_get_name(&pos, ctx);
+}
+
+void rst_put_name(__u8 *name, struct cpt_context *ctx)
+{
+	unsigned long addr = (unsigned long)name;
+
+	if (addr)
+		free_page(addr&~(PAGE_SIZE-1));
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/rst_epoll.c linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_epoll.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/rst_epoll.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_epoll.c	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,173 @@
+/*
+ *
+ *  kernel/cpt/rst_epoll.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/namespace.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+#include <linux/vzcalluser.h>
+#include <linux/eventpoll.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_files.h"
+#include "cpt_kernel.h"
+#include "cpt_fsmagic.h"
+#include "cpt_syscalls.h"
+
+/* Those funcations are static in fs/eventpoll.c */
+extern struct file_operations eventpoll_fops;
+extern int ep_insert(struct eventpoll *ep, struct epoll_event *event,
+		     struct file *tfile, int fd);
+extern struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
+extern void ep_release_epitem(struct epitem *epi);
+
+
+struct file *cpt_open_epolldev(struct cpt_file_image *fi,
+			       unsigned flags,
+			       struct cpt_context *ctx)
+{
+	struct file *file;
+	int efd;
+
+	/* Argument "size" is ignored, use just 1 */
+	efd = sys_epoll_create(1);
+	if (efd < 0)
+		return ERR_PTR(efd);
+
+	file = fget(efd);
+	sys_close(efd);
+	return file;
+}
+
+static int restore_one_epoll(cpt_object_t *obj,
+			     loff_t pos,
+			     struct cpt_epoll_image *ebuf,
+			     cpt_context_t *ctx)
+{
+	int err = 0;
+	loff_t endpos;
+	struct file *file = obj->o_obj;
+	struct eventpoll *ep;
+
+	if (file->f_op != &eventpoll_fops) {
+		eprintk_ctx("bad epoll file\n");
+		return -EINVAL;
+	}
+
+	ep = file->private_data;
+
+	if (unlikely(ep == NULL)) {
+		eprintk_ctx("bad epoll device\n");
+		return -EINVAL;
+	}
+
+	endpos = pos + ebuf->cpt_next;
+	pos += ebuf->cpt_hdrlen;
+	while (pos < endpos) {
+		struct cpt_epoll_file_image efi;
+		struct epoll_event epds;
+
+		cpt_object_t *tobj;
+
+		err = rst_get_object(CPT_OBJ_EPOLL_FILE, pos, &efi, ctx);
+		if (err)
+			return err;
+		tobj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, efi.cpt_file, ctx);
+		if (!tobj) {
+			eprintk_ctx("epoll file not found\n");
+			return -EINVAL;
+		}
+		epds.events = efi.cpt_events;
+		epds.data = efi.cpt_data;
+		down_write(&ep->sem);
+		err = ep_insert(ep, &epds, tobj->o_obj, efi.cpt_fd);
+		if (!err) {
+			struct epitem *epi;
+			epi = ep_find(ep, tobj->o_obj, efi.cpt_fd);
+			if (epi) {
+				epi->revents = efi.cpt_revents;
+				if (efi.cpt_ready) {
+					unsigned long flags;
+					write_lock_irqsave(&ep->lock, flags);
+					if (list_empty(&epi->rdllink))
+						list_add_tail(&epi->rdllink, &ep->rdllist);
+					write_unlock_irqrestore(&ep->lock, flags);
+				}
+				ep_release_epitem(epi);
+			}
+		}
+		up_write(&ep->sem);
+		if (err)
+			break;
+		pos += efi.cpt_next;
+	}
+	return err;
+}
+
+int rst_eventpoll(cpt_context_t *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_EPOLL];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_EPOLL || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		cpt_object_t *obj;
+		struct cpt_epoll_image *ebuf = cpt_get_buf(ctx);
+		err = rst_get_object(CPT_OBJ_EPOLL, sec, ebuf, ctx);
+		if (err) {
+			cpt_release_buf(ctx);
+			return err;
+		}
+		obj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, ebuf->cpt_file, ctx);
+		if (obj == NULL) {
+			eprintk_ctx("cannot find epoll file object\n");
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+		err = restore_one_epoll(obj, sec, ebuf, ctx);
+		cpt_release_buf(ctx);
+		if (err)
+			return err;
+		sec += ebuf->cpt_next;
+	}
+
+	return 0;
+
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/rst_files.c linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_files.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/rst_files.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_files.c	2007-08-28 17:35:36.000000000 +0400
@@ -0,0 +1,1606 @@
+/*
+ *
+ *  kernel/cpt/rst_files.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/mount.h>
+#include <linux/tty.h>
+#include <linux/namei.h>
+#include <linux/vmalloc.h>
+#include <linux/smp_lock.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <asm/uaccess.h>
+#include <ub/ub_mem.h>
+#include <linux/cpt_image.h>
+#include <linux/namespace.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_files.h"
+#include "cpt_kernel.h"
+#include "cpt_fsmagic.h"
+
+#include "cpt_syscalls.h"
+
+
+struct filejob {
+	struct filejob *next;
+	int	pid;
+	loff_t	fdi;
+};
+
+static int rst_filejob_queue(loff_t pos, cpt_context_t *ctx)
+{
+	struct filejob *j;
+
+	j = kmalloc(sizeof(*j), GFP_KERNEL);
+	if (j == NULL)
+		return -ENOMEM;
+	j->pid = current->pid;
+	j->fdi = pos;
+	j->next = ctx->filejob_queue;
+	ctx->filejob_queue = j;
+	return 0;
+}
+
+static void _anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buffer *buf)
+{
+	struct page *page = buf->page;
+
+	if (info->tmp_page) {
+		__free_page(page);
+	} else {
+		info->tmp_page = page;
+	}
+	module_put(THIS_MODULE);
+}
+
+static void *_anon_pipe_buf_map(struct file *file, struct pipe_inode_info *info, struct pipe_buffer *buf)
+{
+	return kmap(buf->page);
+}
+
+static void _anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer *buf)
+{
+	kunmap(buf->page);
+}
+
+static struct pipe_buf_operations _anon_pipe_buf_ops = {
+	.can_merge = 1,
+	.map = _anon_pipe_buf_map,
+	.unmap = _anon_pipe_buf_unmap,
+	.release = _anon_pipe_buf_release,
+};
+
+/* Sorta ugly... Multiple readers/writers of named pipe rewrite buffer
+ * many times. We need to mark it in CPT_OBJ_INODE table in some way.
+ */
+static int fixup_pipe_data(struct file *file, struct cpt_file_image *fi,
+			   struct cpt_context *ctx)
+{
+	struct inode *ino = file->f_dentry->d_inode;
+	struct cpt_inode_image ii;
+	struct cpt_obj_bits b;
+	struct pipe_inode_info *info;
+	int err;
+	int count;
+
+	if (!S_ISFIFO(ino->i_mode)) {
+		eprintk_ctx("fixup_pipe_data: not a pipe %Ld\n", (long long)fi->cpt_inode);
+		return -EINVAL;
+	}
+	if (fi->cpt_inode == CPT_NULL)
+		return 0;
+
+	err = rst_get_object(CPT_OBJ_INODE, fi->cpt_inode, &ii, ctx);
+	if (err)
+		return err;
+
+	if (ii.cpt_next <= ii.cpt_hdrlen)
+		return 0;
+
+	err = rst_get_object(CPT_OBJ_BITS, fi->cpt_inode + ii.cpt_hdrlen, &b, ctx);
+	if (err)
+		return err;
+
+	if (b.cpt_size == 0)
+		return 0;
+
+	mutex_lock(PIPE_MUTEX(*ino));
+	info = ino->i_pipe;
+	if (info->nrbufs) {
+		mutex_unlock(PIPE_MUTEX(*ino));
+		eprintk("pipe buffer is restored already\n");
+		return -EINVAL;
+	}
+	info->curbuf = 0;
+	count = 0;
+	while (count < b.cpt_size) {
+		struct pipe_buffer *buf = info->bufs + info->nrbufs;
+		void * addr;
+		int chars;
+
+		chars = b.cpt_size - count;
+		if (chars > PAGE_SIZE)
+			chars = PAGE_SIZE;
+		if (!try_module_get(THIS_MODULE)) {
+			err = -EBUSY;
+			break;
+		}
+
+		buf->page = alloc_page(GFP_HIGHUSER);
+		if (buf->page == NULL) {
+			err = -ENOMEM;
+			break;
+		}
+		buf->ops = &_anon_pipe_buf_ops;
+		buf->offset = 0;
+		buf->len = chars;
+		info->nrbufs++;
+		addr = kmap(buf->page);
+		err = ctx->pread(addr, chars, ctx,
+				 fi->cpt_inode + ii.cpt_hdrlen + b.cpt_hdrlen + count);
+		if (err)
+			break;
+		count += chars;
+	}
+	mutex_unlock(PIPE_MUTEX(*ino));
+
+	return err;
+}
+
+static int make_flags(struct cpt_file_image *fi)
+{
+	int flags = O_NOFOLLOW;
+	switch (fi->cpt_mode&(FMODE_READ|FMODE_WRITE)) {
+	case FMODE_READ|FMODE_WRITE:
+		flags |= O_RDWR; break;
+	case FMODE_WRITE:
+		flags |= O_WRONLY; break;
+	case FMODE_READ:
+		flags |= O_RDONLY; break;
+	default: break;
+	}
+	flags |= fi->cpt_flags&~(O_ACCMODE|O_CREAT|O_TRUNC|O_EXCL|FASYNC);
+	flags |= O_NONBLOCK|O_NOCTTY;
+	return flags;
+}
+
+static struct file *open_pipe(char *name,
+			      struct cpt_file_image *fi,
+			      unsigned flags,
+			      struct cpt_context *ctx)
+{
+	int err;
+	cpt_object_t *obj;
+	struct cpt_inode_image ii;
+	struct file *rf, *wf;
+
+	err = rst_get_object(CPT_OBJ_INODE, fi->cpt_inode, &ii, ctx);
+	if (err)
+		return ERR_PTR(err);
+
+	if (ii.cpt_sb == FSMAGIC_PIPEFS) {
+		int pfd[2];
+
+		if ((err = sc_pipe(pfd)) < 0)
+			return ERR_PTR(err);
+
+		rf = fcheck(pfd[0]);
+		wf = fcheck(pfd[1]);
+		get_file(rf);
+		get_file(wf);
+		sc_close(pfd[0]);
+		sc_close(pfd[1]);
+
+		if (fi->cpt_mode&FMODE_READ) {
+			struct file *tf;
+			tf = wf; wf = rf; rf = tf;
+		}
+	} else {
+		if (fi->cpt_mode&FMODE_READ) {
+			rf = filp_open(name, flags, 0);
+			if (IS_ERR(rf)) {
+				dprintk_ctx("filp_open\n");
+				return rf;
+			}
+			dprintk_ctx(CPT_FID "open RDONLY fifo ino %Ld %p %x\n", CPT_TID(current),
+				    (long long)fi->cpt_inode, rf, rf->f_dentry->d_inode->i_mode);
+			return rf;
+		}
+
+		dprintk_ctx(CPT_FID "open WRONLY fifo ino %Ld\n", CPT_TID(current), (long long)fi->cpt_inode);
+
+		rf = filp_open(name, O_RDWR|O_NONBLOCK, 0);
+		if (IS_ERR(rf))
+			return rf;
+		wf = dentry_open(dget(rf->f_dentry),
+				 mntget(rf->f_vfsmnt), flags);
+	}
+
+	/* Add pipe inode to obj table. */
+	obj = cpt_object_add(CPT_OBJ_INODE, wf->f_dentry->d_inode, ctx);
+	if (obj == NULL) {
+		fput(rf); fput(wf);
+		return ERR_PTR(-ENOMEM);
+	}
+	cpt_obj_setpos(obj, fi->cpt_inode, ctx);
+	obj->o_parent = rf;
+
+	/* Add another side of pipe to obj table, it will not be used
+	 * (o_pos = PT_NULL), another processes opeining pipe will find
+	 * inode and open it with dentry_open(). */
+	obj = cpt_object_add(CPT_OBJ_FILE, rf, ctx);
+	if (obj == NULL) {
+		fput(wf);
+		return ERR_PTR(-ENOMEM);
+	}
+	return wf;
+}
+
+static struct file *open_special(struct cpt_file_image *fi,
+				 unsigned flags,
+				 int deleted,
+				 struct cpt_context *ctx)
+{
+	struct cpt_inode_image *ii;
+	struct file *file;
+
+	/* Directories and named pipes are not special actually */
+	if (S_ISDIR(fi->cpt_i_mode) || S_ISFIFO(fi->cpt_i_mode))
+		return NULL;
+
+	/* No support for block devices at the moment. */
+	if (S_ISBLK(fi->cpt_i_mode))
+		return ERR_PTR(-EINVAL);
+
+	if (S_ISSOCK(fi->cpt_i_mode)) {
+		eprintk_ctx("bug: socket is not open\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* Support only (some) character devices at the moment. */
+	if (!S_ISCHR(fi->cpt_i_mode))
+		return ERR_PTR(-EINVAL);
+
+	ii = __rst_get_object(CPT_OBJ_INODE, fi->cpt_inode, ctx);
+	if (ii == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	/* Do not worry about this right now. /dev/null,zero,*random are here.
+	 * To prohibit at least /dev/mem?
+	 */
+	if (MAJOR(ii->cpt_rdev) == MEM_MAJOR) {
+		kfree(ii);
+		return NULL;
+	}
+
+	file = rst_open_tty(fi, ii, flags, ctx);
+	kfree(ii);
+	return file;
+}
+
+static int restore_posix_lock(struct file *file, struct cpt_flock_image *fli, cpt_context_t *ctx)
+{
+	struct file_lock lock;
+	cpt_object_t *obj;
+
+	memset(&lock, 0, sizeof(lock));
+	lock.fl_type = fli->cpt_type;
+	lock.fl_flags = fli->cpt_flags & ~FL_SLEEP;
+	lock.fl_start = fli->cpt_start;
+	lock.fl_end = fli->cpt_end;
+	obj = lookup_cpt_obj_byindex(CPT_OBJ_FILES, fli->cpt_owner, ctx);
+	if (!obj) {
+		eprintk_ctx("unknown lock owner %d\n", (int)fli->cpt_owner);
+		return -EINVAL;
+	}
+	lock.fl_owner = obj->o_obj;
+	lock.fl_pid = vpid_to_pid(fli->cpt_pid);
+	if (lock.fl_pid < 0) {
+		eprintk_ctx("unknown lock pid %d\n", lock.fl_pid);
+		return -EINVAL;
+	}
+	lock.fl_file = file;
+
+	if (lock.fl_owner == NULL)
+		eprintk_ctx("no lock owner\n");
+	return posix_lock_file(file, &lock);
+}
+
+static int restore_flock(struct file *file, struct cpt_flock_image *fli,
+			 cpt_context_t *ctx)
+{
+	int cmd, err, fd;
+	fd = get_unused_fd();
+	if (fd < 0) {
+		eprintk_ctx("BSD flock cannot be restored\n");
+		return fd;
+	}
+	get_file(file);
+	fd_install(fd, file);
+	if (fli->cpt_type == F_RDLCK) {
+		cmd = LOCK_SH;
+	} else if (fli->cpt_type == F_WRLCK) {
+		cmd = LOCK_EX;
+	} else {
+		eprintk_ctx("flock flavor is unknown: %u\n", fli->cpt_type);
+		sc_close(fd);
+		return -EINVAL;
+	}
+
+	err = sc_flock(fd, LOCK_NB | cmd);
+	sc_close(fd);
+	return err;
+}
+
+
+static int fixup_posix_locks(struct file *file,
+			     struct cpt_file_image *fi,
+			     loff_t pos, struct cpt_context *ctx)
+{
+	int err;
+	loff_t end;
+	struct cpt_flock_image fli;
+
+	end = pos + fi->cpt_next;
+	pos += fi->cpt_hdrlen;
+	while (pos < end) {
+		err = rst_get_object(-1, pos, &fli, ctx);
+		if (err)
+			return err;
+		if (fli.cpt_object == CPT_OBJ_FLOCK &&
+		    (fli.cpt_flags&FL_POSIX)) {
+			err = restore_posix_lock(file, &fli, ctx);
+			if (err)
+				return err;
+			dprintk_ctx("posix lock restored\n");
+		}
+		pos += fli.cpt_next;
+	}
+	return 0;
+}
+
+int rst_posix_locks(struct cpt_context *ctx)
+{
+	int err;
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file = obj->o_obj;
+		struct cpt_file_image fi;
+
+		if (obj->o_pos == CPT_NULL)
+			continue;
+
+		err = rst_get_object(CPT_OBJ_FILE, obj->o_pos, &fi, ctx);
+		if (err < 0)
+			return err;
+		if (fi.cpt_next > fi.cpt_hdrlen)
+			fixup_posix_locks(file, &fi, obj->o_pos, ctx);
+	}
+	return 0;
+}
+
+static int fixup_flocks(struct file *file,
+			struct cpt_file_image *fi,
+			loff_t pos, struct cpt_context *ctx)
+{
+	int err;
+	loff_t end;
+	struct cpt_flock_image fli;
+
+	end = pos + fi->cpt_next;
+	pos += fi->cpt_hdrlen;
+	while (pos < end) {
+		err = rst_get_object(-1, pos, &fli, ctx);
+		if (err)
+			return err;
+		if (fli.cpt_object == CPT_OBJ_FLOCK &&
+		    (fli.cpt_flags&FL_FLOCK)) {
+			err = restore_flock(file, &fli, ctx);
+			if (err)
+				return err;
+			dprintk_ctx("bsd lock restored\n");
+		}
+		pos += fli.cpt_next;
+	}
+	return 0;
+}
+
+
+static int fixup_reg_data(struct file *file, loff_t pos, loff_t end,
+			  struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_page_block pgb;
+	ssize_t (*do_write)(struct file *, const char __user *, size_t, loff_t *ppos);
+
+	do_write = file->f_op->write;
+	if (do_write == NULL) {
+		eprintk_ctx("no write method. Cannot restore contents of the file.\n");
+		return -EINVAL;
+	}
+
+	atomic_inc(&file->f_count);
+
+	while (pos < end) {
+		loff_t opos;
+		loff_t ipos;
+		int count;
+
+		err = rst_get_object(CPT_OBJ_PAGES, pos, &pgb, ctx);
+		if (err)
+			goto out;
+		dprintk_ctx("restoring file data block: %08x-%08x\n",
+		       (__u32)pgb.cpt_start, (__u32)pgb.cpt_end);
+		ipos = pos + pgb.cpt_hdrlen;
+		opos = pgb.cpt_start;
+		count = pgb.cpt_end-pgb.cpt_start;
+		while (count > 0) {
+			mm_segment_t oldfs;
+			int copy = count;
+
+			if (copy > PAGE_SIZE)
+				copy = PAGE_SIZE;
+			(void)cpt_get_buf(ctx);
+			oldfs = get_fs(); set_fs(KERNEL_DS);
+			err = ctx->pread(ctx->tmpbuf, copy, ctx, ipos);
+			set_fs(oldfs);
+			if (err) {
+				__cpt_release_buf(ctx);
+				goto out;
+			}
+			if (!(file->f_mode & FMODE_WRITE) ||
+			    (file->f_flags&O_DIRECT)) {
+				fput(file);
+				file = dentry_open(dget(file->f_dentry),
+						   mntget(file->f_vfsmnt), O_WRONLY);
+				if (IS_ERR(file)) {
+					__cpt_release_buf(ctx);
+					return PTR_ERR(file);
+				}
+			}
+			oldfs = get_fs(); set_fs(KERNEL_DS);
+			ipos += copy;
+			err = do_write(file, ctx->tmpbuf, copy, &opos);
+			set_fs(oldfs);
+			__cpt_release_buf(ctx);
+			if (err != copy) {
+				if (err >= 0)
+					err = -EIO;
+				goto out;
+			}
+			count -= copy;
+		}
+		pos += pgb.cpt_next;
+	}
+	err = 0;
+
+out:
+	fput(file);
+	return err;
+}
+
+
+static int fixup_file_content(struct file **file_p, struct cpt_file_image *fi,
+			      struct cpt_inode_image *ii,
+			      struct cpt_context *ctx)
+{
+	int err;
+	struct file *file = *file_p;
+	struct iattr newattrs;
+
+	if (!S_ISREG(fi->cpt_i_mode))
+		return 0;
+
+	if (file == NULL) {
+		file = shmem_file_setup("dev/zero", ii->cpt_size, 0);
+		if (IS_ERR(file))
+			return PTR_ERR(file);
+		*file_p = file;
+	}
+
+	if (ii->cpt_next > ii->cpt_hdrlen) {
+		struct cpt_object_hdr hdr;
+		err = ctx->pread(&hdr, sizeof(struct cpt_object_hdr), ctx, fi->cpt_inode+ii->cpt_hdrlen);
+		if (err)
+			return err;
+		if (hdr.cpt_object == CPT_OBJ_PAGES) {
+			err = fixup_reg_data(file, fi->cpt_inode+ii->cpt_hdrlen,
+					fi->cpt_inode+ii->cpt_next, ctx);
+			if (err)
+				return err;
+		}
+	}
+
+	mutex_lock(&file->f_dentry->d_inode->i_mutex);
+	/* stage 1 - update size like do_truncate does */
+	newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
+	newattrs.ia_size = ii->cpt_size;
+	cpt_timespec_import(&newattrs.ia_ctime, ii->cpt_ctime);
+	err = notify_change(file->f_dentry, &newattrs);
+	if (err)
+		goto out;
+
+	/* stage 2 - update times, owner and mode */
+	newattrs.ia_valid = ATTR_MTIME | ATTR_ATIME |
+		ATTR_ATIME_SET | ATTR_MTIME_SET |
+		ATTR_MODE | ATTR_UID | ATTR_GID;
+	newattrs.ia_uid = ii->cpt_uid;
+	newattrs.ia_gid = ii->cpt_gid;
+	newattrs.ia_mode = file->f_dentry->d_inode->i_mode & S_IFMT;
+	newattrs.ia_mode |= (ii->cpt_mode & ~S_IFMT);
+	cpt_timespec_import(&newattrs.ia_atime, ii->cpt_atime);
+	cpt_timespec_import(&newattrs.ia_mtime, ii->cpt_mtime);
+	err = notify_change(file->f_dentry, &newattrs);
+
+out:
+	mutex_unlock(&file->f_dentry->d_inode->i_mutex);
+	return err;
+}
+
+static int fixup_file_flags(struct file *file, struct cpt_file_image *fi,
+			    int was_dentry_open, loff_t pos,
+			    cpt_context_t *ctx)
+{
+	if (fi->cpt_pos != file->f_pos) {
+		int err = -ESPIPE;
+		if (file->f_op->llseek)
+			err = file->f_op->llseek(file, fi->cpt_pos, 0);
+		if (err < 0) {
+			dprintk_ctx("file %Ld lseek %Ld - %Ld\n",
+				    (long long)pos,
+				    (long long)file->f_pos,
+				    (long long)fi->cpt_pos);
+			file->f_pos = fi->cpt_pos;
+		}
+	}
+	file->f_uid = fi->cpt_uid;
+	file->f_gid = fi->cpt_gid;
+	file->f_owner.pid = 0;
+	if (fi->cpt_fown_pid) {
+		file->f_owner.pid = comb_vpid_to_pid(fi->cpt_fown_pid);
+		if (file->f_owner.pid == 0) {
+			wprintk_ctx("fixup_file_flags: owner %d does not exist anymore\n", file->f_owner.pid);
+			return -EINVAL;
+		}
+	}
+	file->f_owner.uid = fi->cpt_fown_uid;
+	file->f_owner.euid = fi->cpt_fown_euid;
+	file->f_owner.signum = fi->cpt_fown_signo;
+
+	if (file->f_mode != fi->cpt_mode) {
+		if (was_dentry_open &&
+		    ((file->f_mode^fi->cpt_mode)&(FMODE_PREAD|FMODE_LSEEK))) {
+			file->f_mode &= ~(FMODE_PREAD|FMODE_LSEEK);
+			file->f_mode |= fi->cpt_mode&(FMODE_PREAD|FMODE_LSEEK);
+		}
+		if (file->f_mode != fi->cpt_mode)
+			wprintk_ctx("file %ld mode mismatch %08x %08x\n", (long)pos, file->f_mode, fi->cpt_mode);
+	}
+	if (file->f_flags != fi->cpt_flags) {
+		if (!(fi->cpt_flags&O_NOFOLLOW))
+			file->f_flags &= ~O_NOFOLLOW;
+		if ((file->f_flags^fi->cpt_flags)&O_NONBLOCK) {
+			file->f_flags &= ~O_NONBLOCK;
+			file->f_flags |= fi->cpt_flags&O_NONBLOCK;
+		}
+		if (fi->cpt_flags&FASYNC) {
+			if (fi->cpt_fown_fd == -1) {
+				wprintk_ctx("No fd for FASYNC\n");
+				return -EINVAL;
+			} else if (file->f_op && file->f_op->fasync) {
+				if (file->f_op->fasync(fi->cpt_fown_fd, file, 1) < 0) {
+					wprintk_ctx("FASYNC problem\n");
+					return -EINVAL;
+				} else {
+					file->f_flags |= FASYNC;
+				}
+			}
+		}
+		if (file->f_flags != fi->cpt_flags) {
+			eprintk_ctx("file %ld flags mismatch %08x %08x\n", (long)pos, file->f_flags, fi->cpt_flags);
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static struct file *
+open_deleted(char *name, unsigned flags, struct cpt_file_image *fi,
+	     struct cpt_inode_image *ii, cpt_context_t *ctx)
+{
+	struct file * file;
+	char *suffix = NULL;
+	int attempt = 0;
+	int tmp_pass = 0;
+	mode_t mode = fi->cpt_i_mode;
+
+	/* Strip (deleted) part... */
+	if (strlen(name) > strlen(" (deleted)")) {
+		if (strcmp(name + strlen(name) - strlen(" (deleted)"), " (deleted)") == 0) {
+			suffix = &name[strlen(name) - strlen(" (deleted)")];
+			*suffix = 0;
+		} else if (memcmp(name, "(deleted) ", strlen("(deleted) ")) == 0) {
+			memmove(name, name + strlen("(deleted) "), strlen(name) - strlen(" (deleted)") + 1);
+			suffix = name + strlen(name);
+		}
+	}
+
+try_again:
+	for (;;) {
+		if (attempt) {
+			if (attempt > 1000) {
+				eprintk_ctx("open_deleted: failed after %d attempts\n", attempt);
+				return ERR_PTR(-EEXIST);
+			}
+			if (suffix == NULL) {
+				eprintk_ctx("open_deleted: no suffix\n");
+				return ERR_PTR(-EEXIST);
+			}
+			sprintf(suffix, ".%08x", (unsigned)((xtime.tv_nsec>>10)+attempt));
+		}
+		attempt++;
+
+		if (S_ISFIFO(mode)) {
+			int err;
+			err = sc_mknod(name, S_IFIFO|(mode&017777), 0);
+			if (err == -EEXIST)
+				continue;
+			if (err < 0 && !tmp_pass)
+				goto change_dir;
+			if (err < 0)
+				return ERR_PTR(err);
+			file = open_pipe(name, fi, flags, ctx);
+			sc_unlink(name);
+		} else if (S_ISCHR(mode)) {
+			int err;
+			err = sc_mknod(name, S_IFCHR|(mode&017777), new_encode_dev(ii->cpt_rdev));
+			if (err == -EEXIST)
+				continue;
+			if (err < 0 && !tmp_pass)
+				goto change_dir;
+			if (err < 0)
+				return ERR_PTR(err);
+			file = filp_open(name, flags, mode&017777);
+			sc_unlink(name);
+		} else if (S_ISDIR(mode)) {
+			int err;
+			err = sc_mkdir(name, mode&017777);
+			if (err == -EEXIST)
+				continue;
+			if (err < 0 && !tmp_pass)
+				goto change_dir;
+			if (err < 0)
+				return ERR_PTR(err);
+			file = filp_open(name, flags, mode&017777);
+			sc_rmdir(name);
+		} else {
+			file = filp_open(name, O_CREAT|O_EXCL|flags, mode&017777);
+			if (IS_ERR(file)) {
+				if (PTR_ERR(file) == -EEXIST)
+					continue;
+				if (!tmp_pass)
+					goto change_dir;
+			} else {
+				sc_unlink(name);
+			}
+		}
+		break;
+	}
+
+	if (IS_ERR(file)) {
+		eprintk_ctx("filp_open %s: %ld\n", name, PTR_ERR(file));
+		return file;
+	} else {
+		dprintk_ctx("deleted file created as %s, %p, %x\n", name, file, file->f_dentry->d_inode->i_mode);
+	}
+	return file;
+
+change_dir:
+	sprintf(name, "/tmp/rst%u", current->pid);
+	suffix = name + strlen(name);
+	attempt = 1;
+	tmp_pass = 1;
+	goto try_again;
+}
+
+struct file *rst_file(loff_t pos, int fd, struct cpt_context *ctx)
+{
+	int err;
+	int was_dentry_open = 0;
+	cpt_object_t *obj;
+	cpt_object_t *iobj;
+	struct cpt_file_image fi;
+	__u8 *name = NULL;
+	struct file *file;
+	int flags;
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, pos, ctx);
+	if (obj) {
+		file = obj->o_obj;
+		if (obj->o_index >= 0) {
+			dprintk_ctx("file is attached to a socket\n");
+			err = rst_get_object(CPT_OBJ_FILE, pos, &fi, ctx);
+			if (err < 0)
+				goto err_out;
+			fixup_file_flags(file, &fi, 0, pos, ctx);
+		}
+		get_file(file);
+		return file;
+	}
+
+	err = rst_get_object(CPT_OBJ_FILE, pos, &fi, ctx);
+	if (err < 0)
+		goto err_out;
+
+	flags = make_flags(&fi);
+
+	/* Easy way, inode has been already open. */
+	if (fi.cpt_inode != CPT_NULL &&
+	    !(fi.cpt_lflags & CPT_DENTRY_CLONING) &&
+	    (iobj = lookup_cpt_obj_bypos(CPT_OBJ_INODE, fi.cpt_inode, ctx)) != NULL &&
+	    iobj->o_parent) {
+		struct file *filp = iobj->o_parent;
+		file = dentry_open(dget(filp->f_dentry),
+				   mntget(filp->f_vfsmnt), flags);
+		dprintk_ctx("rst_file: file obtained by dentry_open\n");
+		was_dentry_open = 1;
+		goto map_file;
+	}
+
+	if (fi.cpt_next > fi.cpt_hdrlen)
+		name = rst_get_name(pos + sizeof(fi), ctx);
+
+	if (!name) {
+		eprintk_ctx("no name for file?\n");
+		err = -EINVAL;
+		goto err_out;
+	}
+
+	if (fi.cpt_lflags & CPT_DENTRY_DELETED) {
+		struct cpt_inode_image ii;
+		if (fi.cpt_inode == CPT_NULL) {
+			eprintk_ctx("deleted file and no inode.\n");
+			err = -EINVAL;
+			goto err_out;
+		}
+
+		err = rst_get_object(CPT_OBJ_INODE, fi.cpt_inode, &ii, ctx);
+		if (err)
+			goto err_out;
+
+		if (ii.cpt_next > ii.cpt_hdrlen) {
+			struct cpt_object_hdr hdr;
+			err = ctx->pread(&hdr, sizeof(hdr), ctx,
+					fi.cpt_inode + ii.cpt_hdrlen);
+			if (err)
+				goto err_out;
+			if (hdr.cpt_object == CPT_OBJ_NAME) {
+				rst_put_name(name, ctx);
+				name = rst_get_name(fi.cpt_inode+ii.cpt_hdrlen,
+						ctx);
+				if (!name) {
+					eprintk_ctx("no name for link?\n");
+					err = -EINVAL;
+					goto err_out;
+				}
+				goto open_file;
+			}
+		}
+
+		/* One very special case... */
+		if (S_ISREG(fi.cpt_i_mode) &&
+		    (!name[0] || strcmp(name, "/dev/zero (deleted)") == 0)) {
+			/* MAP_ANON|MAP_SHARED mapping.
+			 * kernel makes this damn ugly way, when file which
+			 * is passed to mmap by user does not match
+			 * file finally attached to VMA. Ok, rst_mm
+			 * has to take care of this. Otherwise, it will fail.
+			 */
+			file = NULL;
+		} else if (S_ISREG(fi.cpt_i_mode) ||
+			   S_ISCHR(fi.cpt_i_mode) ||
+			   S_ISFIFO(fi.cpt_i_mode) ||
+			   S_ISDIR(fi.cpt_i_mode)) {
+			if (S_ISCHR(fi.cpt_i_mode)) {
+				file = open_special(&fi, flags, 1, ctx);
+				if (file != NULL)
+					goto map_file;
+			}
+			file = open_deleted(name, flags, &fi, &ii, ctx);
+			if (IS_ERR(file))
+				goto out;
+		} else {
+			eprintk_ctx("not a regular deleted file.\n");
+			err = -EINVAL;
+			goto err_out;
+		}
+
+		err = fixup_file_content(&file, &fi, &ii, ctx);
+		if (err)
+			goto err_put;
+		goto map_file;
+	} else {
+open_file:
+		if (!name[0]) {
+			eprintk_ctx("empty name for file?\n");
+			err = -EINVAL;
+			goto err_out;
+		}
+		if ((fi.cpt_lflags & CPT_DENTRY_EPOLL) &&
+		    (file = cpt_open_epolldev(&fi, flags, ctx)) != NULL)
+			goto map_file;
+#ifdef CONFIG_INOTIFY_USER
+		if ((fi.cpt_lflags & CPT_DENTRY_INOTIFY) &&
+		    (file = rst_open_inotify(&fi, flags, ctx)) != NULL)
+			goto map_file;
+#else
+		if (fi.cpt_lflags & CPT_DENTRY_INOTIFY) {
+			err = -EINVAL;
+			goto err_out;
+		}
+#endif
+		if (S_ISFIFO(fi.cpt_i_mode) &&
+		    (file = open_pipe(name, &fi, flags, ctx)) != NULL)
+			goto map_file;
+		if (!S_ISREG(fi.cpt_i_mode) &&
+		    (file = open_special(&fi, flags, 0, ctx)) != NULL)
+			goto map_file;
+	}
+
+	file = filp_open(name, flags, 0);
+
+map_file:
+	if (!IS_ERR(file)) {
+		fixup_file_flags(file, &fi, was_dentry_open, pos, ctx);
+
+		if (S_ISFIFO(fi.cpt_i_mode) && !was_dentry_open) {
+			err = fixup_pipe_data(file, &fi, ctx);
+			if (err)
+				goto err_put;
+		}
+
+		/* This is very special hack. Logically, cwd/root are
+		 * nothing but open directories. Nevertheless, this causes
+		 * failures of restores, when number of open files in VE
+		 * is close to limit. So, if it is rst_file() of cwd/root
+		 * (fd = -2) and the directory is not deleted, we skip
+		 * adding files to object table. If the directory is
+		 * not unlinked, this cannot cause any problems.
+		 */
+		if (fd != -2 ||
+		    !S_ISDIR(file->f_dentry->d_inode->i_mode) ||
+		    (fi.cpt_lflags & CPT_DENTRY_DELETED)) {
+			obj = cpt_object_get(CPT_OBJ_FILE, file, ctx);
+			if (!obj) {
+				obj = cpt_object_add(CPT_OBJ_FILE, file, ctx);
+				if (obj)
+					get_file(file);
+			}
+			if (obj)
+				cpt_obj_setpos(obj, pos, ctx);
+
+			obj = cpt_object_add(CPT_OBJ_INODE, file->f_dentry->d_inode, ctx);
+			if (obj) {
+				cpt_obj_setpos(obj, fi.cpt_inode, ctx);
+				if (!obj->o_parent || !(fi.cpt_lflags & CPT_DENTRY_DELETED))
+					obj->o_parent = file;
+			}
+		}
+
+		if (fi.cpt_next > fi.cpt_hdrlen) {
+			err = fixup_flocks(file, &fi, pos, ctx);
+			if (err)
+				goto err_put;
+		}
+	} else {
+		if (fi.cpt_lflags & CPT_DENTRY_PROC) {
+			dprintk_ctx("rst_file /proc delayed\n");
+			file = NULL;
+		} else if (name)
+			eprintk_ctx("can't open file %s\n", name);
+	}
+
+out:
+	if (name)
+		rst_put_name(name, ctx);
+	return file;
+
+err_put:
+	if (file)
+		fput(file);
+err_out:
+	if (name)
+		rst_put_name(name, ctx);
+	return ERR_PTR(err);
+}
+
+
+__u32 rst_files_flag(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	__u32 flag = 0;
+
+	if (ti->cpt_files == CPT_NULL ||
+	    lookup_cpt_obj_bypos(CPT_OBJ_FILES, ti->cpt_files, ctx))
+		flag |= CLONE_FILES;
+	if (ti->cpt_fs == CPT_NULL ||
+	    lookup_cpt_obj_bypos(CPT_OBJ_FS, ti->cpt_fs, ctx))
+		flag |= CLONE_FS;
+	return flag;
+}
+
+static void local_close_files(struct files_struct * files)
+{
+	int i, j;
+
+	j = 0;
+	for (;;) {
+		unsigned long set;
+		i = j * __NFDBITS;
+		if (i >= files->fdt->max_fdset || i >= files->fdt->max_fds)
+			break;
+		set = files->fdt->open_fds->fds_bits[j];
+		while (set) {
+			if (set & 1) {
+				struct file * file = xchg(&files->fdt->fd[i], NULL);
+				if (file)
+					filp_close(file, files);
+			}
+			i++;
+			set >>= 1;
+		}
+		files->fdt->open_fds->fds_bits[j] = 0;
+		files->fdt->close_on_exec->fds_bits[j] = 0;
+		j++;
+	}
+}
+
+extern int expand_fdtable(struct files_struct *files, int nr);
+
+
+int rst_files_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	struct cpt_files_struct_image fi;
+	struct files_struct *f = current->files;
+	cpt_object_t *obj;
+	loff_t pos, endpos;
+	int err;
+
+	if (ti->cpt_files == CPT_NULL) {
+		current->files = NULL;
+		if (f)
+			put_files_struct(f);
+		return 0;
+	}
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_FILES, ti->cpt_files, ctx);
+	if (obj) {
+		if (obj->o_obj != f) {
+			put_files_struct(f);
+			f = obj->o_obj;
+			atomic_inc(&f->count);
+			current->files = f;
+		}
+		return 0;
+	}
+
+	err = rst_get_object(CPT_OBJ_FILES, ti->cpt_files, &fi, ctx);
+	if (err)
+		return err;
+
+	local_close_files(f);
+
+	if (fi.cpt_max_fds > f->fdt->max_fds) {
+		spin_lock(&f->file_lock);
+		err = expand_fdtable(f, fi.cpt_max_fds-1);
+		spin_unlock(&f->file_lock);
+		if (err)
+			return err;
+	}
+
+	pos = ti->cpt_files + fi.cpt_hdrlen;
+	endpos = ti->cpt_files + fi.cpt_next;
+	while (pos < endpos) {
+		struct cpt_fd_image fdi;
+		struct file *filp;
+
+		err = rst_get_object(CPT_OBJ_FILEDESC, pos, &fdi, ctx);
+		if (err)
+			return err;
+		filp = rst_file(fdi.cpt_file, fdi.cpt_fd, ctx);
+		if (IS_ERR(filp)) {
+			eprintk_ctx("rst_file: %ld %Lu\n", PTR_ERR(filp),
+				    (long long)fdi.cpt_file);
+			return PTR_ERR(filp);
+		}
+		if (filp == NULL) {
+			int err = rst_filejob_queue(pos, ctx);
+			if (err)
+				return err;
+		} else {
+			if (fdi.cpt_fd >= f->fdt->max_fds) BUG();
+			f->fdt->fd[fdi.cpt_fd] = filp;
+			FD_SET(fdi.cpt_fd, f->fdt->open_fds);
+			if (fdi.cpt_flags&CPT_FD_FLAG_CLOSEEXEC)
+				FD_SET(fdi.cpt_fd, f->fdt->close_on_exec);
+		}
+		pos += fdi.cpt_next;
+	}
+	f->fdt->next_fd = fi.cpt_next_fd;
+
+	obj = cpt_object_add(CPT_OBJ_FILES, f, ctx);
+	if (obj) {
+		cpt_obj_setpos(obj, ti->cpt_files, ctx);
+		cpt_obj_setindex(obj, fi.cpt_index, ctx);
+	}
+	return 0;
+}
+
+int rst_do_filejobs(cpt_context_t *ctx)
+{
+	struct filejob *j;
+
+	while ((j = ctx->filejob_queue) != NULL) {
+		int err;
+		task_t *tsk;
+		struct cpt_fd_image fdi;
+		struct file *filp;
+
+		read_lock(&tasklist_lock);
+		tsk = find_task_by_pid_ve(j->pid);
+		if (tsk)
+			get_task_struct(tsk);
+		read_unlock(&tasklist_lock);
+		if (!tsk)
+			return -EINVAL;
+
+		err = rst_get_object(CPT_OBJ_FILEDESC, j->fdi, &fdi, ctx);
+		if (err) {
+			put_task_struct(tsk);
+			return err;
+		}
+
+		if (fdi.cpt_fd >= tsk->files->fdt->max_fds) BUG();
+		if (tsk->files->fdt->fd[fdi.cpt_fd] ||
+		    FD_ISSET(fdi.cpt_fd, tsk->files->fdt->open_fds)) {
+			eprintk_ctx("doing filejob %Ld: fd is busy\n", j->fdi);
+			put_task_struct(tsk);
+			return -EBUSY;
+		}
+
+		filp = rst_file(fdi.cpt_file, fdi.cpt_fd, ctx);
+		if (IS_ERR(filp)) {
+			eprintk_ctx("rst_do_filejobs: 1: %ld %Lu\n", PTR_ERR(filp), (unsigned long long)fdi.cpt_file);
+			put_task_struct(tsk);
+			return PTR_ERR(filp);
+		}
+		if (fdi.cpt_fd >= tsk->files->fdt->max_fds) BUG();
+		tsk->files->fdt->fd[fdi.cpt_fd] = filp;
+		FD_SET(fdi.cpt_fd, tsk->files->fdt->open_fds);
+		if (fdi.cpt_flags&CPT_FD_FLAG_CLOSEEXEC)
+			FD_SET(fdi.cpt_fd, tsk->files->fdt->close_on_exec);
+
+		dprintk_ctx("filejob %Ld done\n", j->fdi);
+
+		put_task_struct(tsk);
+		ctx->filejob_queue = j->next;
+		kfree(j);
+	}
+	return 0;
+}
+
+void rst_flush_filejobs(cpt_context_t *ctx)
+{
+	struct filejob *j;
+
+	while ((j = ctx->filejob_queue) != NULL) {
+		ctx->filejob_queue = j->next;
+		kfree(j);
+	}
+}
+
+int rst_fs_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	struct fs_struct *f = current->fs;
+	cpt_object_t *obj;
+
+	if (ti->cpt_fs == CPT_NULL) {
+		exit_fs(current);
+		return 0;
+	}
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_FS, ti->cpt_fs, ctx);
+	if (obj) {
+		if (obj->o_obj != f) {
+			exit_fs(current);
+			f = obj->o_obj;
+			atomic_inc(&f->count);
+			current->fs = f;
+		}
+		return 0;
+	}
+
+	/* Do _not_ restore root. Image contains absolute pathnames.
+	 * So, we fix it in context of rst process.
+	 */
+
+	obj = cpt_object_add(CPT_OBJ_FS, f, ctx);
+	if (obj)
+		cpt_obj_setpos(obj, ti->cpt_fs, ctx);
+
+	return 0;
+}
+
+int cpt_get_dentry(struct dentry **dp, struct vfsmount **mp,
+		   loff_t *pos, struct cpt_context *ctx)
+{
+	struct cpt_file_image fi;
+	struct file * file;
+	int err;
+
+	err = rst_get_object(CPT_OBJ_FILE, *pos, &fi, ctx);
+	if (err)
+		return err;
+
+	file = rst_file(*pos, -2, ctx);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	*dp = dget(file->f_dentry);
+	*mp = mntget(file->f_vfsmnt);
+	*pos += fi.cpt_next;
+	fput(file);
+	return 0;
+}
+
+static void __set_fs_root(struct fs_struct *fs, struct vfsmount *mnt,
+			  struct dentry *dentry)
+{
+	struct dentry *old_root;
+	struct vfsmount *old_rootmnt;
+	write_lock(&fs->lock);
+	old_root = fs->root;
+	old_rootmnt = fs->rootmnt;
+	fs->rootmnt = mnt;
+	fs->root = dentry;
+	write_unlock(&fs->lock);
+	if (old_root) {
+		dput(old_root);
+		mntput(old_rootmnt);
+	}
+}
+
+static void __set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
+			 struct dentry *dentry)
+{
+	struct dentry *old_pwd;
+	struct vfsmount *old_pwdmnt;
+
+	write_lock(&fs->lock);
+	old_pwd = fs->pwd;
+	old_pwdmnt = fs->pwdmnt;
+	fs->pwdmnt = mnt;
+	fs->pwd = dentry;
+	write_unlock(&fs->lock);
+
+	if (old_pwd) {
+		dput(old_pwd);
+		mntput(old_pwdmnt);
+	}
+}
+
+
+int rst_restore_fs(struct cpt_context *ctx)
+{
+	loff_t pos;
+	cpt_object_t *obj;
+	int err = 0;
+
+	for_each_object(obj, CPT_OBJ_FS) {
+		struct cpt_fs_struct_image fi;
+		struct fs_struct *fs = obj->o_obj;
+		int i;
+		struct dentry *d[3];
+		struct vfsmount *m[3];
+
+		err = rst_get_object(CPT_OBJ_FS, obj->o_pos, &fi, ctx);
+		if (err)
+			return err;
+
+		fs->umask = fi.cpt_umask;
+
+		pos = obj->o_pos + fi.cpt_hdrlen;
+		d[0] = d[1] = d[2] = NULL;
+		m[0] = m[1] = m[2] = NULL;
+		i = 0;
+		while (pos < obj->o_pos + fi.cpt_next && i<3) {
+			err = cpt_get_dentry(d+i, m+i, &pos, ctx);
+			if (err) {
+				eprintk_ctx("cannot get_dir: %d", err);
+				for (--i; i >= 0; i--) {
+					if (d[i])
+						dput(d[i]);
+					if (m[i])
+						mntput(m[i]);
+				}
+				return err;
+			}
+			i++;
+		}
+		if (d[0])
+			__set_fs_root(fs, m[0], d[0]);
+		if (d[1])
+			__set_fs_pwd(fs, m[1], d[1]);
+		if (d[2]) {
+			struct dentry *olddentry;
+			struct vfsmount *oldmnt;
+			write_lock(&fs->lock);
+			oldmnt = fs->altrootmnt;
+			olddentry = fs->altroot;
+			fs->altrootmnt = m[2];
+			fs->altroot = d[2];
+			write_unlock(&fs->lock);
+
+			if (olddentry) {
+				dput(olddentry);
+				mntput(oldmnt);
+			}
+		}
+	}
+	return err;
+}
+
+int do_one_mount(char *mntpnt, char *mnttype, char *mntbind,
+		 unsigned long flags, unsigned long mnt_flags,
+		 struct cpt_context *ctx)
+{
+	int err;
+
+	if (mntbind && (strcmp(mntbind, "/") == 0 || strcmp(mntbind, "") == 0))
+		mntbind = NULL;
+
+	if (mntbind)
+		flags |= MS_BIND;
+	/* Join per-mountpoint flags with global flags */
+	if (mnt_flags & MNT_NOSUID)
+		flags |= MS_NOSUID;
+	if (mnt_flags & MNT_NODEV)
+		flags |= MS_NODEV;
+	if (mnt_flags & MNT_NOEXEC)
+		flags |= MS_NOEXEC;
+
+	err = sc_mount(mntbind, mntpnt, mnttype, flags);
+	if (err < 0) {
+		eprintk_ctx("%d mounting %s %s %08lx\n", err, mntpnt, mnttype, flags);
+		return err;
+	}
+	return 0;
+}
+
+static int undumptmpfs(void *arg)
+{
+	int i;
+	int *pfd = arg;
+	int fd1, fd2, err;
+	char *argv[] = { "tar", "x", "-C", "/", "-S", NULL };
+
+	if (pfd[0] != 0)
+		sc_dup2(pfd[0], 0);
+
+	set_fs(KERNEL_DS);
+	fd1 = sc_open("/dev/null", O_WRONLY, 0);
+	fd2 = sc_open("/dev/null", O_WRONLY, 0);
+try:
+	if (fd1 < 0 || fd2 < 0) {
+		if (fd1 == -ENOENT && fd2 == -ENOENT) {
+			err = sc_mknod("/dev/null", S_IFCHR|0666,
+					new_encode_dev((MEM_MAJOR<<MINORBITS)|3));
+			if (err < 0) {
+				eprintk("can't create /dev/null: %d\n", err);
+				module_put(THIS_MODULE);
+				return 255 << 8;
+			}
+			fd1 = sc_open("/dev/null", O_WRONLY, 0666);
+			fd2 = sc_open("/dev/null", O_WRONLY, 0666);
+			sc_unlink("/dev/null");
+			goto try;
+		}
+		eprintk("can not open /dev/null for tar: %d %d\n", fd1, fd2);
+		module_put(THIS_MODULE);
+		return 255 << 8;
+	}
+	if (fd1 != 1)
+		sc_dup2(fd1, 1);
+	if (fd2 != 2)
+		sc_dup2(fd2, 2);
+
+	for (i = 3; i < current->files->fdt->max_fds; i++)
+		sc_close(i);
+
+	module_put(THIS_MODULE);
+
+	i = sc_execve("/bin/tar", argv, NULL);
+	eprintk("failed to exec /bin/tar: %d\n", i);
+	return 255 << 8;
+}
+
+static int rst_restore_tmpfs(loff_t *pos, struct cpt_context * ctx)
+{
+	int err;
+	int pfd[2];
+	struct file *f;
+	struct cpt_object_hdr v;
+	int n;
+	loff_t end;
+	int pid;
+	int status;
+	mm_segment_t oldfs;
+	sigset_t ignore, blocked;
+
+	err = rst_get_object(CPT_OBJ_NAME, *pos, &v, ctx);
+	if (err < 0)
+		return err;
+
+	err = sc_pipe(pfd);
+	if (err < 0)
+		return err;
+	ignore.sig[0] = CPT_SIG_IGNORE_MASK;
+	sigprocmask(SIG_BLOCK, &ignore, &blocked);
+	pid = err = local_kernel_thread(undumptmpfs, (void*)pfd, SIGCHLD, 0);
+	if (err < 0) {
+		eprintk_ctx("tmpfs local_kernel_thread: %d\n", err);
+		goto out;
+	}
+	f = fget(pfd[1]);
+	sc_close(pfd[1]);
+	sc_close(pfd[0]);
+
+	ctx->file->f_pos = *pos + v.cpt_hdrlen;
+	end = *pos + v.cpt_next;
+	*pos += v.cpt_next;
+	do {
+		char buf[16];
+
+		n = end - ctx->file->f_pos;
+		if (n > sizeof(buf))
+			n = sizeof(buf);
+
+		if (ctx->read(buf, n, ctx))
+			break;
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		f->f_op->write(f, buf, n, &f->f_pos);
+		set_fs(oldfs);
+	} while (ctx->file->f_pos < end);
+
+	fput(f);
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	if ((err = sc_waitx(pid, 0, &status)) < 0)
+		eprintk_ctx("wait4: %d\n", err);
+	else if ((status & 0x7f) == 0) {
+		err = (status & 0xff00) >> 8;
+		if (err != 0) {
+			eprintk_ctx("tar exited with %d\n", err);
+			err = -EINVAL;
+		}
+	} else {
+		eprintk_ctx("tar terminated\n");
+		err = -EINVAL;
+	}
+	set_fs(oldfs);
+	sigprocmask(SIG_SETMASK, &blocked, NULL);
+
+	return err;
+
+out:
+	if (pfd[1] >= 0)
+		sc_close(pfd[1]);
+	if (pfd[0] >= 0)
+		sc_close(pfd[0]);
+	sigprocmask(SIG_SETMASK, &blocked, NULL);
+	return err;
+}
+
+int check_ext_mount(char *mntpnt, char *mnttype, struct cpt_context *ctx)
+{
+	struct namespace *n = current->namespace;
+	struct list_head *p;
+	struct vfsmount *t;
+	char *path, *path_buf;
+	int ret;
+
+	ret = -ENOENT;
+	path_buf = cpt_get_buf(ctx);
+	down_read(&namespace_sem);
+	list_for_each(p, &n->list) {
+		t = list_entry(p, struct vfsmount, mnt_list);
+		path = d_path(t->mnt_root, t, path_buf, PAGE_SIZE);
+		if (IS_ERR(path))
+			continue;
+		if (!strcmp(path, mntpnt) &&
+		    !strcmp(t->mnt_sb->s_type->name, mnttype)) {
+			ret = 0;
+			break;
+		}
+	}
+	up_read(&namespace_sem);
+	__cpt_release_buf(ctx);
+	return ret;
+}
+
+int restore_one_vfsmount(struct cpt_vfsmount_image *mi, loff_t pos, struct cpt_context *ctx)
+{
+	int err;
+	loff_t endpos;
+
+	endpos = pos + mi->cpt_next;
+	pos += mi->cpt_hdrlen;
+
+	while (pos < endpos) {
+		char *mntdev;
+		char *mntpnt;
+		char *mnttype;
+		char *mntbind;
+
+		mntdev = __rst_get_name(&pos, ctx);
+		mntpnt = __rst_get_name(&pos, ctx);
+		mnttype = __rst_get_name(&pos, ctx);
+		mntbind = NULL;
+		if (mi->cpt_mntflags & CPT_MNT_BIND)
+			mntbind = __rst_get_name(&pos, ctx);
+		err = -EINVAL;
+		if (mnttype && mntpnt) {
+			err = 0;
+			if (!(mi->cpt_mntflags & CPT_MNT_EXT) &&
+			    strcmp(mntpnt, "/")) {
+				err = do_one_mount(mntpnt, mnttype, mntbind,
+						   mi->cpt_flags,
+						   mi->cpt_mntflags, ctx);
+				if (!err &&
+				    strcmp(mnttype, "tmpfs") == 0 &&
+				    !(mi->cpt_mntflags & (CPT_MNT_BIND)))
+					    err = rst_restore_tmpfs(&pos, ctx);
+			} else if (mi->cpt_mntflags & CPT_MNT_EXT) {
+				err = check_ext_mount(mntpnt, mnttype, ctx);
+				if (err)
+					eprintk_ctx("mount point is missing: %s\n", mntpnt);
+			}
+		}
+		if (mntdev)
+			rst_put_name(mntdev, ctx);
+		if (mntpnt)
+			rst_put_name(mntpnt, ctx);
+		if (mnttype)
+			rst_put_name(mnttype, ctx);
+		if (mntbind)
+			rst_put_name(mntbind, ctx);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+int restore_one_namespace(loff_t pos, loff_t endpos, struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_vfsmount_image mi;
+
+	while (pos < endpos) {
+		err = rst_get_object(CPT_OBJ_VFSMOUNT, pos, &mi, ctx);
+		if (err)
+			return err;
+		err = restore_one_vfsmount(&mi, pos, ctx);
+		if (err)
+			return err;
+		pos += mi.cpt_next;
+	}
+	return 0;
+}
+
+int rst_root_namespace(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_NAMESPACE];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_object_hdr sbuf;
+	int done = 0;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_NAMESPACE || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		err = rst_get_object(CPT_OBJ_NAMESPACE, sec, &sbuf, ctx);
+		if (err)
+			return err;
+		if (done) {
+			eprintk_ctx("multiple namespaces are not supported\n");
+			break;
+		}
+		done++;
+		err = restore_one_namespace(sec+sbuf.cpt_hdrlen, sec+sbuf.cpt_next, ctx);
+		if (err)
+			return err;
+		sec += sbuf.cpt_next;
+	}
+
+	return 0;
+}
+
+int rst_stray_files(struct cpt_context *ctx)
+{
+	int err = 0;
+	loff_t sec = ctx->sections[CPT_SECT_FILES];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_FILES || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		struct cpt_object_hdr sbuf;
+		cpt_object_t *obj;
+
+		err = _rst_get_object(CPT_OBJ_FILE, sec, &sbuf, sizeof(sbuf), ctx);
+		if (err)
+			break;
+
+		obj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, sec, ctx);
+		if (!obj) {
+			struct file *file;
+
+			dprintk_ctx("stray file %Ld\n", sec);
+
+			file = rst_sysv_shm(sec, ctx);
+
+			if (IS_ERR(file)) {
+				eprintk_ctx("rst_stray_files: %ld\n", PTR_ERR(file));
+				return PTR_ERR(file);
+			} else {
+				fput(file);
+			}
+		}
+		sec += sbuf.cpt_next;
+	}
+
+	return err;
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/rst_inotify.c linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_inotify.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/rst_inotify.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_inotify.c	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,199 @@
+/*
+ *
+ *  kernel/cpt/rst_inotify.c
+ *
+ *  Copyright (C) 2000-2007  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/namespace.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+#include <linux/vzcalluser.h>
+#include <linux/inotify.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_files.h"
+#include "cpt_kernel.h"
+#include "cpt_fsmagic.h"
+#include "cpt_syscalls.h"
+
+extern struct file_operations inotify_fops;
+
+struct file *rst_open_inotify(struct cpt_file_image *fi,
+			      unsigned flags,
+			      struct cpt_context *ctx)
+{
+	struct file *file;
+	int fd;
+
+	fd = sys_inotify_init();
+	if (fd < 0)
+		return ERR_PTR(fd);
+
+	file = fget(fd);
+	sys_close(fd);
+	return file;
+}
+
+static int restore_one_inotify(cpt_object_t *obj,
+			       loff_t pos,
+			       struct cpt_inotify_image *ibuf,
+			       cpt_context_t *ctx)
+{
+	int err = 0;
+	loff_t endpos;
+	struct file *file = obj->o_obj;
+	struct inotify_device *dev;
+
+	if (file->f_op != &inotify_fops) {
+		eprintk_ctx("bad inotify file\n");
+		return -EINVAL;
+	}
+
+	dev = file->private_data;
+
+	if (unlikely(dev == NULL)) {
+		eprintk_ctx("bad inotify device\n");
+		return -EINVAL;
+	}
+
+	endpos = pos + ibuf->cpt_next;
+	pos += ibuf->cpt_hdrlen;
+	while (pos < endpos) {
+		union {
+			struct cpt_inotify_wd_image wi;
+			struct cpt_inotify_ev_image ei;
+		} u;
+
+		err = rst_get_object(-1, pos, &u, ctx);
+		if (err) {
+			eprintk_ctx("rst_get_object: %d\n", err);
+			return err;
+		}
+		if (u.wi.cpt_object == CPT_OBJ_INOTIFY_WATCH) {
+			struct dentry *d;
+			struct vfsmount *mnt;
+			loff_t fpos = pos + u.wi.cpt_hdrlen;
+
+			err = cpt_get_dentry(&d, &mnt, &fpos, ctx);
+			if (err) {
+				eprintk_ctx("cpt_get_dentry: %d\n", err);
+				return err;
+			}
+
+			mutex_lock(&dev->up_mutex);
+			dev->ih->last_wd = u.wi.cpt_wd - 1;
+			err = inotify_create_watch(dev, d, mnt, u.wi.cpt_mask);
+			dev->ih->last_wd = ibuf->cpt_last_wd;
+			if (err != u.wi.cpt_wd) {
+				eprintk_ctx("wrong inotify descriptor %u %u\n", err, u.wi.cpt_wd);
+				if (err >= 0)
+					err = -EINVAL;
+			} else
+				err = 0;
+			mutex_unlock(&dev->up_mutex);
+			dput(d);
+			mntput(mnt);
+			if (err)
+				break;
+		} else if (u.wi.cpt_object == CPT_OBJ_INOTIFY_EVENT) {
+			struct inotify_user_watch dummy_watch;
+			struct inotify_watch *w;
+			char *name = NULL;
+
+			if (u.ei.cpt_namelen) {
+				name = kmalloc(u.ei.cpt_namelen+1, GFP_KERNEL);
+				if (name == NULL) {
+					err = -ENOMEM;
+					break;
+				}
+				name[u.ei.cpt_namelen] = 0;
+				err = ctx->pread(name, u.ei.cpt_namelen, ctx, pos + u.ei.cpt_hdrlen);
+				if (err) {
+					kfree(name);
+					break;
+				}
+			}
+
+			w = &dummy_watch.wdata;
+			dummy_watch.dev = dev;
+			atomic_set(&w->count, 2);
+
+			/* Trick to avoid destruction due to exit event */
+			if (u.ei.cpt_mask & (IN_IGNORED | IN_ONESHOT))
+				atomic_inc(&w->count);
+			dev->ih->in_ops->handle_event(w, u.ei.cpt_wd, u.ei.cpt_mask,
+						      u.ei.cpt_cookie, name, NULL);
+			if (name)
+				kfree(name);
+		} else {
+			eprintk_ctx("bad object: %u\n", u.wi.cpt_object);
+			err = -EINVAL;
+			break;
+		}
+		pos += u.wi.cpt_next;
+	}
+	return err;
+}
+
+int rst_inotify(cpt_context_t *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_INOTIFY];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_INOTIFY || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		cpt_object_t *obj;
+		struct cpt_inotify_image ibuf;
+
+		err = rst_get_object(CPT_OBJ_INOTIFY, sec, &ibuf, ctx);
+		if (err)
+			return err;
+		obj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, ibuf.cpt_file, ctx);
+		if (obj == NULL) {
+			eprintk_ctx("cannot find inotify file object\n");
+			return -EINVAL;
+		}
+		err = restore_one_inotify(obj, sec, &ibuf, ctx);
+		if (err)
+			return err;
+		sec += ibuf.cpt_next;
+	}
+
+	return 0;
+	
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/rst_mm.c linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_mm.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/rst_mm.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_mm.c	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,1098 @@
+/*
+ *
+ *  kernel/cpt/rst_mm.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/virtinfo.h>
+#include <linux/virtinfoscp.h>
+#include <linux/hugetlb.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/mman.h>
+#include <linux/vmalloc.h>
+#include <linux/rmap.h>
+#include <linux/hash.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#ifdef CONFIG_X86
+#include <asm/ldt.h>
+#include <asm/desc.h>
+#endif
+#include <asm/mmu_context.h>
+#include <linux/swapops.h>
+#include <linux/cpt_image.h>
+
+#ifdef CONFIG_VE
+#include <ub/beancounter.h>
+#include <ub/ub_vmpages.h>
+#endif
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_files.h"
+#include "cpt_ubc.h"
+#include "cpt_mm.h"
+#include "cpt_kernel.h"
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+#include "cpt_pagein.h"
+#endif
+
+#include "cpt_syscalls.h"
+
+#define __PAGE_NX (1ULL<<63)
+
+static unsigned long make_prot(struct cpt_vma_image *vmai)
+{
+	unsigned long prot = 0;
+
+	if (vmai->cpt_flags&VM_READ)
+		prot |= PROT_READ;
+	if (vmai->cpt_flags&VM_WRITE)
+		prot |= PROT_WRITE;
+	if (vmai->cpt_flags&VM_EXEC)
+		prot |= PROT_EXEC;
+	if (vmai->cpt_flags&VM_GROWSDOWN)
+		prot |= PROT_GROWSDOWN;
+	if (vmai->cpt_flags&VM_GROWSUP)
+		prot |= PROT_GROWSUP;
+	return prot;
+}
+
+static unsigned long make_flags(struct cpt_vma_image *vmai)
+{
+	unsigned long flags = MAP_FIXED;
+
+	if (vmai->cpt_flags&(VM_SHARED|VM_MAYSHARE))
+		flags |= MAP_SHARED;
+	else
+		flags |= MAP_PRIVATE;
+
+	if (vmai->cpt_file == CPT_NULL)
+		flags |= MAP_ANONYMOUS;
+	if (vmai->cpt_flags&VM_GROWSDOWN)
+		flags |= MAP_GROWSDOWN;
+#ifdef MAP_GROWSUP
+	if (vmai->cpt_flags&VM_GROWSUP)
+		flags |= MAP_GROWSUP;
+#endif
+	if (vmai->cpt_flags&VM_DENYWRITE)
+		flags |= MAP_DENYWRITE;
+	if (vmai->cpt_flags&VM_EXECUTABLE)
+		flags |= MAP_EXECUTABLE;
+	if (!(vmai->cpt_flags&VM_ACCOUNT))
+		flags |= MAP_NORESERVE;
+	return flags;
+}
+
+#ifdef CONFIG_X86
+#if !defined(CONFIG_X86_64) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,15) \
+				&& !defined(CONFIG_XEN)
+static int __alloc_ldt(mm_context_t *pc, int mincount)
+{
+	int oldsize, newsize, i;
+
+	if (mincount <= pc->size)
+		return 0;
+	/*
+	 * LDT got larger - reallocate if necessary.
+	 */
+	oldsize = pc->size;
+	mincount = (mincount+511)&(~511);
+	newsize = mincount*LDT_ENTRY_SIZE;
+	for (i = 0; i < newsize; i += PAGE_SIZE) {
+		int nr = i/PAGE_SIZE;
+		BUG_ON(i >= 64*1024);
+		if (!pc->ldt_pages[nr]) {
+			pc->ldt_pages[nr] = alloc_page(GFP_HIGHUSER|__GFP_UBC);
+			if (!pc->ldt_pages[nr])
+				return -ENOMEM;
+			clear_highpage(pc->ldt_pages[nr]);
+		}
+	}
+	pc->size = mincount;
+	return 0;
+}
+
+static int do_rst_ldt(struct cpt_obj_bits *li, loff_t pos, struct cpt_context *ctx)
+{
+	struct mm_struct *mm = current->mm;
+	int i;
+	int err;
+	int size;
+
+	err = __alloc_ldt(&mm->context, li->cpt_size/LDT_ENTRY_SIZE);
+	if (err)
+		return err;
+
+	size = mm->context.size*LDT_ENTRY_SIZE;
+
+	for (i = 0; i < size; i += PAGE_SIZE) {
+		int nr = i / PAGE_SIZE, bytes;
+		char *kaddr = kmap(mm->context.ldt_pages[nr]);
+
+		bytes = size - i;
+		if (bytes > PAGE_SIZE)
+			bytes = PAGE_SIZE;
+		err = ctx->pread(kaddr, bytes, ctx, pos + li->cpt_hdrlen + i);
+		kunmap(mm->context.ldt_pages[nr]);
+		if (err)
+			return err;
+	}
+
+	load_LDT(&mm->context);
+	return 0;
+}
+
+#else
+
+static int do_rst_ldt(struct cpt_obj_bits *li, loff_t pos, struct cpt_context *ctx)
+{
+	struct mm_struct *mm = current->mm;
+	int oldsize = mm->context.size;
+	void *oldldt;
+	void *newldt;
+	int err;
+
+	if (li->cpt_size > PAGE_SIZE)
+		newldt = vmalloc(li->cpt_size);
+	else
+		newldt = kmalloc(li->cpt_size, GFP_KERNEL);
+
+	if (!newldt)
+		return -ENOMEM;
+
+	err = ctx->pread(newldt, li->cpt_size, ctx, pos + li->cpt_hdrlen);
+	if (err)
+		return err;
+
+	oldldt = mm->context.ldt;
+	mm->context.ldt = newldt;
+	mm->context.size = li->cpt_size/LDT_ENTRY_SIZE;
+
+	load_LDT(&mm->context);
+
+	if (oldsize) {
+		if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
+			vfree(oldldt);
+		else
+			kfree(oldldt);
+	}
+	return 0;
+}
+#endif
+#endif
+
+static int
+restore_aio_ring(struct kioctx *aio_ctx, struct cpt_aio_ctx_image *aimg)
+{
+	struct aio_ring_info *info = &aio_ctx->ring_info;
+	unsigned nr_events = aio_ctx->max_reqs;
+	unsigned long size;
+	int nr_pages;
+
+	/* We recalculate parameters of the ring exactly like
+	 * fs/aio.c does and then compare calculated values
+	 * with ones, stored in dump. They must be the same. */
+
+	nr_events += 2;
+
+	size = sizeof(struct aio_ring);
+	size += sizeof(struct io_event) * nr_events;
+	nr_pages = (size + PAGE_SIZE-1) >> PAGE_SHIFT;
+
+	if (nr_pages != aimg->cpt_ring_pages)
+		return -EINVAL;
+
+	info->nr_pages = nr_pages;
+
+	nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) / sizeof(struct io_event);
+
+	if (nr_events != aimg->cpt_nr)
+		return -EINVAL;
+
+	info->nr = 0;
+	info->ring_pages = info->internal_pages;
+	if (nr_pages > AIO_RING_PAGES) {
+		info->ring_pages = kmalloc(sizeof(struct page *) * nr_pages, GFP_KERNEL);
+		if (!info->ring_pages)
+			return -ENOMEM;
+		memset(info->ring_pages, 0, sizeof(struct page *) * nr_pages);
+	}
+
+	info->mmap_size = nr_pages * PAGE_SIZE;
+
+	/* This piece of shit is not entirely my fault. Kernel aio.c makes
+	 * something odd mmap()ping some pages and then pinning them.
+	 * I guess it is just some mud remained of failed attempt to show ring
+	 * to user space. The result is odd. :-) Immediately after
+	 * creation of AIO context, kernel shares those pages with user
+	 * and user can read and even write there. But after the first
+	 * fork, pages are marked COW with evident consequences.
+	 * I remember, I did the same mistake in the first version
+	 * of mmapped packet socket, luckily that crap never reached
+	 * mainstream.
+	 *
+	 * So, what are we going to do? I can simulate this odd behaviour
+	 * exactly, but I am not insane yet. For now just take the pages
+	 * from user space. Alternatively, we could keep kernel copy
+	 * in AIO context image, which would be more correct.
+	 *
+	 * What is wrong now? If the pages are COWed, ring is transferred
+	 * incorrectly.
+	 */
+	down_read(&current->mm->mmap_sem);
+	info->mmap_base = aimg->cpt_mmap_base;
+	info->nr_pages = get_user_pages(current, current->mm,
+					info->mmap_base, nr_pages,
+					1, 0, info->ring_pages, NULL);
+	up_read(&current->mm->mmap_sem);
+
+	if (unlikely(info->nr_pages != nr_pages)) {
+		int i;
+
+		for (i=0; i<info->nr_pages; i++)
+			put_page(info->ring_pages[i]);
+		if (info->ring_pages && info->ring_pages != info->internal_pages)
+			kfree(info->ring_pages);
+		return -EFAULT;
+	}
+
+	aio_ctx->user_id = info->mmap_base;
+
+	info->nr = nr_events;
+	info->tail = aimg->cpt_tail;
+
+	return 0;
+}
+
+static int do_rst_aio(struct cpt_aio_ctx_image *aimg, loff_t pos, cpt_context_t *ctx)
+{
+	int err;
+	struct kioctx *aio_ctx;
+	extern spinlock_t aio_nr_lock;
+
+	aio_ctx = kmem_cache_alloc(kioctx_cachep, GFP_KERNEL);
+	if (!aio_ctx)
+		return -ENOMEM;
+
+	memset(aio_ctx, 0, sizeof(*aio_ctx));
+	aio_ctx->max_reqs = aimg->cpt_max_reqs;
+
+	if ((err = restore_aio_ring(aio_ctx, aimg)) < 0) {
+		kmem_cache_free(kioctx_cachep, aio_ctx);
+		eprintk_ctx("AIO %Ld restore_aio_ring: %d\n", pos, err);
+		return err;
+	}
+
+	aio_ctx->mm = current->mm;
+	atomic_inc(&aio_ctx->mm->mm_count);
+	atomic_set(&aio_ctx->users, 1);
+	spin_lock_init(&aio_ctx->ctx_lock);
+	spin_lock_init(&aio_ctx->ring_info.ring_lock);
+	init_waitqueue_head(&aio_ctx->wait);
+	INIT_LIST_HEAD(&aio_ctx->active_reqs);
+	INIT_LIST_HEAD(&aio_ctx->run_list);
+	INIT_WORK(&aio_ctx->wq, aio_kick_handler, ctx);
+
+	spin_lock(&aio_nr_lock);
+	aio_nr += aio_ctx->max_reqs;
+	spin_unlock(&aio_nr_lock);
+
+	write_lock(&aio_ctx->mm->ioctx_list_lock);
+	aio_ctx->next = aio_ctx->mm->ioctx_list;
+	aio_ctx->mm->ioctx_list = aio_ctx;
+	write_unlock(&aio_ctx->mm->ioctx_list_lock);
+
+	return 0;
+}
+
+struct anonvma_map
+{
+	struct hlist_node	list;
+	struct anon_vma		*avma;
+	__u64			id;
+};
+
+static int verify_create_anonvma(struct mm_struct *mm,
+				 struct cpt_vma_image *vmai,
+				 cpt_context_t *ctx)
+{
+	struct anon_vma *avma = NULL;
+	struct anon_vma *new_avma;
+	struct vm_area_struct *vma;
+	int h;
+
+	if (!ctx->anonvmas) {
+		if (CPT_ANONVMA_HSIZE*sizeof(struct hlist_head) > PAGE_SIZE)
+			return -EINVAL;
+		if ((ctx->anonvmas = (void*)__get_free_page(GFP_KERNEL)) == NULL)
+			return -ENOMEM;
+		for (h = 0; h < CPT_ANONVMA_HSIZE; h++)
+			INIT_HLIST_HEAD(&ctx->anonvmas[h]);
+	} else {
+		struct anonvma_map *map;
+		struct hlist_node *elem;
+
+		h = hash_long((unsigned long)vmai->cpt_anonvmaid, CPT_ANONVMA_HBITS);
+		hlist_for_each_entry(map, elem, &ctx->anonvmas[h], list) {
+			if (map->id == vmai->cpt_anonvmaid) {
+				avma = map->avma;
+				break;
+			}
+		}
+	}
+
+	down_read(&mm->mmap_sem);
+	if ((vma = find_vma(mm, vmai->cpt_start)) == NULL) {
+		up_read(&mm->mmap_sem);
+		return -ESRCH;
+	}
+	if (vma->vm_start != vmai->cpt_start) {
+		up_read(&mm->mmap_sem);
+		eprintk_ctx("vma start mismatch\n");
+		return -EINVAL;
+	}
+	if (vma->vm_pgoff != vmai->cpt_pgoff) {
+		dprintk_ctx("vma pgoff mismatch, fixing\n");
+		if (vma->vm_file || (vma->vm_flags&(VM_SHARED|VM_MAYSHARE))) {
+			eprintk_ctx("cannot fixup vma pgoff\n");
+			up_read(&mm->mmap_sem);
+			return -EINVAL;
+		}
+		vma->vm_pgoff = vmai->cpt_pgoff;
+	}
+
+	if (!vma->anon_vma) {
+		if (avma) {
+			vma->anon_vma = avma;
+			anon_vma_link(vma);
+		} else {
+			int err;
+
+			err = anon_vma_prepare(vma);
+
+			if (err) {
+				up_read(&mm->mmap_sem);
+				return err;
+			}
+		}
+	} else {
+		/* Note, we _can_ arrive to the situation, when two
+		 * different anonvmaid's point to one anon_vma, this happens
+		 * f.e. when mmap() merged new area to previous one and
+		 * they will share one anon_vma even if they did not on
+		 * original host.
+		 *
+		 * IT IS OK. To all that I understand, we may merge all
+		 * the anon_vma's and rmap can scan all the huge list of vmas
+		 * searching for page. It is just "suboptimal".
+		 *
+		 * Real disaster would happen, if vma already got an anon_vma
+		 * with different id. It is very rare case, kernel does the
+		 * best efforts to merge anon_vmas when some attributes are
+		 * different. In this case we will fall to copying memory.
+		 */
+		if (avma && vma->anon_vma != avma) {
+			up_read(&mm->mmap_sem);
+			wprintk_ctx("anon_vma mismatch\n");
+			return 0;
+		}
+	}
+
+	new_avma = vma->anon_vma;
+	up_read(&mm->mmap_sem);
+
+	if (!avma) {
+		struct anonvma_map *map;
+
+		if (!new_avma)
+			return -EINVAL;
+
+		if ((map = kmalloc(sizeof(*map), GFP_KERNEL)) == NULL)
+			return -ENOMEM;
+
+		map->id = vmai->cpt_anonvmaid;
+		map->avma = new_avma;
+		h = hash_long((unsigned long)vmai->cpt_anonvmaid, CPT_ANONVMA_HBITS);
+		hlist_add_head(&map->list, &ctx->anonvmas[h]);
+	}
+	return 0;
+}
+
+static int copy_mm_pages(struct mm_struct *src, unsigned long start,
+			 unsigned long end)
+{
+	int err;
+
+	for (; start < end; start += PAGE_SIZE) {
+		struct page *page;
+		struct page *spage;
+		void *maddr, *srcaddr;
+
+		err = get_user_pages(current, current->mm,
+				     start, 1, 1, 1, &page, NULL);
+		if (err == 0)
+			err = -EFAULT;
+		if (err < 0)
+			return err;
+
+		err = get_user_pages(current, src,
+				     start, 1, 0, 1, &spage, NULL);
+
+		if (err == 0)
+			err = -EFAULT;
+		if (err < 0) {
+			page_cache_release(page);
+			return err;
+		}
+
+		srcaddr = kmap(spage);
+		maddr = kmap(page);
+		memcpy(maddr, srcaddr, PAGE_SIZE);
+		set_page_dirty_lock(page);
+		kunmap(page);
+		kunmap(spage);
+		page_cache_release(page);
+		page_cache_release(spage);
+	}
+	return 0;
+}
+
+static int do_rst_vma(struct cpt_vma_image *vmai, loff_t vmapos, loff_t mmpos, struct cpt_context *ctx)
+{
+	int err = 0;
+	unsigned long addr;
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	struct file *file = NULL;
+	unsigned long prot;
+	int checked = 0;
+
+	prot = make_prot(vmai);
+
+	if (vmai->cpt_file != CPT_NULL) {
+		if (vmai->cpt_type == CPT_VMA_TYPE_0) {
+			file = rst_file(vmai->cpt_file, -1, ctx);
+			if (IS_ERR(file)) {
+				eprintk_ctx("do_rst_vma: rst_file: %Ld\n", (unsigned long long)vmai->cpt_file);
+				return PTR_ERR(file);
+			}
+		} else if (vmai->cpt_type == CPT_VMA_TYPE_SHM) {
+			file = rst_sysv_shm(vmai->cpt_file, ctx);
+			if (IS_ERR(file))
+				return PTR_ERR(file);
+		}
+	}
+
+	down_write(&mm->mmap_sem);
+	addr = do_mmap_pgoff(file, vmai->cpt_start,
+			     vmai->cpt_end-vmai->cpt_start,
+			     prot, make_flags(vmai),
+			     vmai->cpt_pgoff);
+
+	if (addr != vmai->cpt_start) {
+		up_write(&mm->mmap_sem);
+
+		err = -EINVAL;
+		if (IS_ERR((void*)addr))
+			err = addr;
+		goto out;
+	}
+
+	vma = find_vma(mm, vmai->cpt_start);
+	if (vma == NULL) {
+		up_write(&mm->mmap_sem);
+		eprintk_ctx("cannot find mmapped vma\n");
+		err = -ESRCH;
+		goto out;
+	}
+
+	/* do_mmap_pgoff() can merge new area to previous one (not to the next,
+	 * we mmap in order, the rest of mm is still unmapped). This can happen
+	 * f.e. if flags are to be adjusted later, or if we had different
+	 * anon_vma on two adjacent regions. Split it by brute force. */
+	if (vma->vm_start != vmai->cpt_start) {
+		dprintk_ctx("vma %Ld merged, split\n", vmapos);
+		err = split_vma(mm, vma, (unsigned long)vmai->cpt_start, 0);
+		if (err) {
+			up_write(&mm->mmap_sem);
+			eprintk_ctx("cannot split vma\n");
+			goto out;
+		}
+	}
+	up_write(&mm->mmap_sem);
+
+	if (vmai->cpt_anonvma && vmai->cpt_anonvmaid) {
+		err = verify_create_anonvma(mm, vmai, ctx);
+		if (err) {
+			eprintk_ctx("cannot verify_create_anonvma %Ld\n", vmapos);
+			goto out;
+		}
+	}
+
+	if (vmai->cpt_next > vmai->cpt_hdrlen) {
+		loff_t offset = vmapos + vmai->cpt_hdrlen;
+
+		do {
+			union {
+				struct cpt_page_block pb;
+				struct cpt_remappage_block rpb;
+				struct cpt_copypage_block cpb;
+				struct cpt_lazypage_block lpb;
+				struct cpt_iterpage_block ipb;
+			} u;
+			loff_t pos;
+
+			err = rst_get_object(-1, offset, &u, ctx);
+			if (err) {
+				eprintk_ctx("vma fix object: %d\n", err);
+				goto out;
+			}
+			if (u.rpb.cpt_object == CPT_OBJ_REMAPPAGES) {
+				err = sc_remap_file_pages(u.rpb.cpt_start,
+							  u.rpb.cpt_end-u.rpb.cpt_start,
+							  0, u.rpb.cpt_pgoff, 0);
+				if (err < 0) {
+					eprintk_ctx("remap_file_pages: %d (%08x,%u,%u)\n", err,
+					       (__u32)u.rpb.cpt_start, (__u32)(u.rpb.cpt_end-u.rpb.cpt_start),
+					       (__u32)u.rpb.cpt_pgoff);
+					goto out;
+				}
+				offset += u.rpb.cpt_next;
+				continue;
+			} else if (u.cpb.cpt_object == CPT_OBJ_LAZYPAGES) {
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+				unsigned long ptr = u.lpb.cpt_start;
+
+				down_read(&mm->mmap_sem);
+				if ((vma = find_vma(mm, u.lpb.cpt_start)) == NULL) {
+					up_read(&mm->mmap_sem);
+					eprintk_ctx("lost vm_area_struct\n");
+					err = -ESRCH;
+					goto out;
+				}
+				err = anon_vma_prepare(vma);
+				if (err) {
+					up_read(&mm->mmap_sem);
+					goto out;
+				}
+				while (ptr < u.lpb.cpt_end) {
+					err = rst_pagein(vma, u.lpb.cpt_index + (ptr-u.lpb.cpt_start)/PAGE_SIZE,
+							 ptr, ctx);
+					if (err)
+						break;
+					ptr += PAGE_SIZE;
+				}
+				up_read(&mm->mmap_sem);
+#else
+				err = -EINVAL;
+#endif
+				if (err)
+					goto out;
+				offset += u.cpb.cpt_next;
+				continue;
+			} else if (u.cpb.cpt_object == CPT_OBJ_COPYPAGES) {
+				struct vm_area_struct *vma, *vma1;
+				struct mm_struct *src;
+				struct anon_vma *src_anon;
+				cpt_object_t *mobj;
+
+				if (!vmai->cpt_anonvmaid) {
+					err = -EINVAL;
+					eprintk_ctx("CPT_OBJ_COPYPAGES in !anonvma\n");
+					goto out;
+				}
+
+				mobj = lookup_cpt_obj_bypos(CPT_OBJ_MM, u.cpb.cpt_source, ctx);
+				if (!mobj) {
+					eprintk_ctx("lost mm_struct to clone pages from\n");
+					err = -ESRCH;
+					goto out;
+				}
+				src = mobj->o_obj;
+
+				down_read(&src->mmap_sem);
+				src_anon = NULL;
+				vma1 = find_vma(src, u.cpb.cpt_start);
+				if (vma1)
+					src_anon = vma1->anon_vma;
+				up_read(&src->mmap_sem);
+
+				if (!vma1) {
+					eprintk_ctx("lost src vm_area_struct\n");
+					err = -ESRCH;
+					goto out;
+				}
+
+				down_read(&mm->mmap_sem);
+				if ((vma = find_vma(mm, u.cpb.cpt_start)) == NULL) {
+					up_read(&mm->mmap_sem);
+					eprintk_ctx("lost vm_area_struct\n");
+					err = -ESRCH;
+					goto out;
+				}
+
+				if (!src_anon ||
+				    !vma->anon_vma ||
+				    vma->anon_vma != src_anon ||
+				    vma->vm_start - vma1->vm_start !=
+				    (vma->vm_pgoff - vma1->vm_pgoff) << PAGE_SHIFT) {
+					up_read(&mm->mmap_sem);
+					wprintk_ctx("anon_vma mismatch in vm_area_struct %Ld\n", vmapos);
+					err = copy_mm_pages(mobj->o_obj,
+							    u.cpb.cpt_start,
+							    u.cpb.cpt_end);
+				} else {
+					err = __copy_page_range(vma, vma1,
+								u.cpb.cpt_start,
+								u.cpb.cpt_end-u.cpb.cpt_start);
+					up_read(&mm->mmap_sem);
+				}
+				if (err) {
+					eprintk_ctx("clone_page_range: %d (%08x,%u,%ld)\n", err,
+						(__u32)u.cpb.cpt_start, (__u32)(u.cpb.cpt_end-u.cpb.cpt_start),
+						(long)u.cpb.cpt_source);
+					goto out;
+				}
+
+				offset += u.cpb.cpt_next;
+				continue;
+			} else if (u.pb.cpt_object == CPT_OBJ_ITERPAGES ||
+				   u.pb.cpt_object == CPT_OBJ_ITERYOUNGPAGES
+				   ) {
+#ifdef CONFIG_VZ_CHECKPOINT_ITER
+				unsigned long ptr = u.lpb.cpt_start;
+				u64 page_pos[16];
+				pos = offset + sizeof(u.pb);
+
+				err = ctx->pread(&page_pos,
+						 8*(u.lpb.cpt_end-ptr)/PAGE_SIZE,
+						 ctx,
+						 pos);
+				if (err) {
+					eprintk_ctx("Oops\n");
+					goto out;
+				}
+
+				down_read(&mm->mmap_sem);
+				if ((vma = find_vma(mm, u.lpb.cpt_start)) == NULL) {
+					up_read(&mm->mmap_sem);
+					eprintk_ctx("lost vm_area_struct\n");
+					err = -ESRCH;
+					goto out;
+				}
+				err = anon_vma_prepare(vma);
+				if (err) {
+					up_read(&mm->mmap_sem);
+					goto out;
+				}
+				while (ptr < u.lpb.cpt_end) {
+					err = rst_iter(vma,
+						       page_pos[(ptr-u.lpb.cpt_start)/PAGE_SIZE],
+						       ptr,
+						       ctx);
+					if (err)
+						break;
+					ptr += PAGE_SIZE;
+				}
+				if (u.pb.cpt_object == CPT_OBJ_ITERYOUNGPAGES) {
+					make_pages_present((unsigned long)u.lpb.cpt_start,
+							   (unsigned long)u.lpb.cpt_end);
+				}
+				up_read(&mm->mmap_sem);
+#else
+				err = -EINVAL;
+#endif
+				if (err)
+					goto out;
+				offset += u.cpb.cpt_next;
+				continue;
+			}
+			if (u.pb.cpt_object != CPT_OBJ_PAGES) {
+				eprintk_ctx("unknown vma fix object %d\n", u.pb.cpt_object);
+				err = -EINVAL;
+				goto out;
+			}
+			pos = offset + sizeof(u.pb);
+			if (!(vmai->cpt_flags&VM_ACCOUNT) && !(prot&PROT_WRITE)) {
+				/* I guess this is get_user_pages() messed things,
+				 * this happens f.e. when gdb inserts breakpoints.
+				 */
+				int i;
+				for (i=0; i<(u.pb.cpt_end-u.pb.cpt_start)/PAGE_SIZE; i++) {
+					struct page *page;
+					void *maddr;
+					err = get_user_pages(current, current->mm,
+							     (unsigned long)u.pb.cpt_start + i*PAGE_SIZE,
+							     1, 1, 1, &page, NULL);
+					if (err == 0)
+						err = -EFAULT;
+					if (err < 0) {
+						eprintk_ctx("get_user_pages: %d\n", err);
+						goto out;
+					}
+					err = 0;
+					maddr = kmap(page);
+					if (u.pb.cpt_content == CPT_CONTENT_VOID) {
+						memset(maddr, 0, PAGE_SIZE);
+					} else if (u.pb.cpt_content == CPT_CONTENT_DATA) {
+						err = ctx->pread(maddr, PAGE_SIZE,
+								 ctx, pos + i*PAGE_SIZE);
+						if (err) {
+							kunmap(page);
+							goto out;
+						}
+					} else {
+						err = -EINVAL;
+						kunmap(page);
+						goto out;
+					}
+					set_page_dirty_lock(page);
+					kunmap(page);
+					page_cache_release(page);
+				}
+			} else {
+				if (!(prot&PROT_WRITE))
+					sc_mprotect(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start, prot | PROT_WRITE);
+				if (u.pb.cpt_content == CPT_CONTENT_VOID) {
+					int i;
+					for (i=0; i<(u.pb.cpt_end-u.pb.cpt_start)/sizeof(unsigned long); i++) {
+						err = __put_user(0UL, ((unsigned long __user*)(unsigned long)u.pb.cpt_start) + i);
+						if (err) {
+							eprintk_ctx("__put_user 2 %d\n", err);
+							goto out;
+						}
+					}
+				} else if (u.pb.cpt_content == CPT_CONTENT_DATA) {
+					loff_t tpos = pos;
+					err = ctx->file->f_op->read(ctx->file, cpt_ptr_import(u.pb.cpt_start),
+							 u.pb.cpt_end-u.pb.cpt_start,
+							 &tpos);
+					if (err != u.pb.cpt_end-u.pb.cpt_start) {
+						if (err >= 0)
+							err = -EIO;
+						goto out;
+					}
+				} else {
+					err = -EINVAL;
+					goto out;
+				}
+				if (!(prot&PROT_WRITE))
+					sc_mprotect(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start, prot);
+			}
+			err = 0;
+			offset += u.pb.cpt_next;
+		} while (offset < vmapos + vmai->cpt_next);
+	}
+
+check:
+	do {
+		struct vm_area_struct *vma;
+		down_read(&mm->mmap_sem);
+		vma = find_vma(mm, addr);
+		if (vma) {
+			if ((vma->vm_flags^vmai->cpt_flags)&VM_READHINTMASK) {
+				VM_ClearReadHint(vma);
+				vma->vm_flags |= vmai->cpt_flags&VM_READHINTMASK;
+			}
+			if ((vma->vm_flags^vmai->cpt_flags)&VM_LOCKED) {
+				dprintk_ctx("fixing up VM_LOCKED %Ld\n", vmapos);
+				up_read(&mm->mmap_sem);
+				if (vma->vm_flags&VM_LOCKED)
+					err = sc_munlock(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start);
+				else
+					err = sc_mlock(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start);
+				/* When mlock fails with EFAULT, it means
+				 * that it could not bring in pages.
+				 * It can happen after mlock() on unreadable
+				 * VMAs. But VMA is correctly locked,
+				 * so that this error can be ignored. */
+				if (err == -EFAULT)
+					err = 0;
+				if (err)
+					goto out;
+				goto check;
+			}
+			if ((vma->vm_page_prot.pgprot^vmai->cpt_pgprot)&~__PAGE_NX)
+				wprintk_ctx("VMA %08lx@%ld pgprot mismatch %08Lx %08Lx\n", addr, (long)vmapos,
+					    (unsigned long long)vma->vm_page_prot.pgprot,
+					    (unsigned long long)vmai->cpt_pgprot);
+#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
+			if (((vma->vm_page_prot.pgprot^vmai->cpt_pgprot)&__PAGE_NX) &&
+			    (ctx->kernel_config_flags&CPT_KERNEL_CONFIG_PAE))
+				wprintk_ctx("VMA %08lx@%ld pgprot mismatch %08Lx %08Lx\n", addr, (long)vmapos,
+				       (__u64)vma->vm_page_prot.pgprot, (__u64)vmai->cpt_pgprot);
+#endif
+			if (vma->vm_flags != vmai->cpt_flags) {
+				unsigned long x = vma->vm_flags ^ vmai->cpt_flags;
+				if (x & VM_EXEC) {
+					/* Crap. On i386 this is OK.
+					 * It is impossible to make via mmap/mprotect
+					 * exec.c clears VM_EXEC on stack. */
+					vma->vm_flags &= ~VM_EXEC;
+				} else if ((x & VM_ACCOUNT) && !checked) {
+					checked = 1;
+					if (!(prot&PROT_WRITE)) {
+						up_read(&mm->mmap_sem);
+						sc_mprotect(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start, prot | PROT_WRITE);
+						sc_mprotect(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start, prot);
+						goto check;
+					}
+					wprintk_ctx("VMA %08lx@%ld flag mismatch %08x %08x\n", addr, (long)vmapos,
+					       (__u32)vma->vm_flags, (__u32)vmai->cpt_flags);
+				} else {
+					wprintk_ctx("VMA %08lx@%ld flag mismatch %08x %08x\n", addr, (long)vmapos,
+					       (__u32)vma->vm_flags, (__u32)vmai->cpt_flags);
+				}
+			}
+		} else {
+			wprintk_ctx("no VMA for %08lx@%ld\n", addr, (long)vmapos);
+		}
+		up_read(&mm->mmap_sem);
+	} while (0);
+
+out:
+	if (file)
+		fput(file);
+	return err;
+}
+
+#ifndef CONFIG_IA64
+#define TASK_UNMAP_START	0
+#else
+/* On IA64 the first page is a special VM_IO|VM_RESERVED mapping
+ * used to accelerate speculative dereferences of NULL pointer. */
+#define TASK_UNMAP_START	PAGE_SIZE
+#endif
+
+static int do_rst_mm(struct cpt_mm_image *vmi, loff_t pos, struct cpt_context *ctx)
+{
+	int err = 0;
+	unsigned int def_flags;
+	struct mm_struct *mm = current->mm;
+#ifdef CONFIG_USER_RESOURCE
+	struct user_beancounter *bc;
+#endif
+
+	down_write(&mm->mmap_sem);
+	do_munmap(mm, TASK_UNMAP_START, TASK_SIZE-TASK_UNMAP_START);
+
+#ifdef CONFIG_USER_RESOURCE
+	/*
+	 * MM beancounter is usually correct from the fork time,
+	 * but not for init, for example.
+	 * Luckily, mm_ub can be changed for a completely empty MM.
+	 */
+	bc = rst_lookup_ubc(vmi->cpt_mmub, ctx);
+	err = virtinfo_notifier_call(VITYPE_SCP, VIRTINFO_SCP_RSTMM, bc);
+	if (err & NOTIFY_FAIL) {
+		up_write(&mm->mmap_sem);
+		return -ECHRNG;
+	}
+	if ((err & VIRTNOTIFY_CHANGE) && bc != mm->mm_ub) {
+		struct user_beancounter *old_bc;
+
+		old_bc = mm->mm_ub;
+		mm->mm_ub = bc;
+		bc = old_bc;
+	}
+	err = 0;
+	put_beancounter(bc);
+#endif
+
+	mm->start_code = vmi->cpt_start_code;
+	mm->end_code = vmi->cpt_end_code;
+	mm->start_data = vmi->cpt_start_data;
+	mm->end_data = vmi->cpt_end_data;
+	mm->start_brk = vmi->cpt_start_brk;
+	mm->brk = vmi->cpt_brk;
+	mm->start_stack = vmi->cpt_start_stack;
+	mm->arg_start = vmi->cpt_start_arg;
+	mm->arg_end = vmi->cpt_end_arg;
+	mm->env_start = vmi->cpt_start_env;
+	mm->env_end = vmi->cpt_end_env;
+	mm->def_flags = 0;
+	def_flags = vmi->cpt_def_flags;
+
+	mm->dumpable = vmi->cpt_dumpable;
+	mm->vps_dumpable = vmi->cpt_vps_dumpable;
+
+#if 0 /* def CONFIG_HUGETLB_PAGE*/
+/* NB: ? */
+	int used_hugetlb;
+#endif
+	up_write(&mm->mmap_sem);
+
+	if (vmi->cpt_next > vmi->cpt_hdrlen) {
+		loff_t offset = pos + vmi->cpt_hdrlen;
+		do {
+			union {
+				struct cpt_vma_image vmai;
+				struct cpt_aio_ctx_image aioi;
+				struct cpt_obj_bits bits;
+			} u;
+			err = rst_get_object(-1, offset, &u, ctx);
+			if (err)
+				goto out;
+			if (u.vmai.cpt_object == CPT_OBJ_VMA) {
+#ifdef CONFIG_IA64
+				//// Later...
+				if (u.vmai.cpt_start)
+#endif			
+				err = do_rst_vma(&u.vmai, offset, pos, ctx);
+				if (err)
+					goto out;
+#ifdef CONFIG_X86
+			} else if (u.bits.cpt_object == CPT_OBJ_BITS &&
+				   u.bits.cpt_content == CPT_CONTENT_MM_CONTEXT) {
+				err = do_rst_ldt(&u.bits, offset, ctx);
+				if (err)
+					goto out;
+#endif
+			} else if (u.aioi.cpt_object == CPT_OBJ_AIO_CONTEXT) {
+				err = do_rst_aio(&u.aioi, offset, ctx);
+				if (err)
+					goto out;
+			} else {
+				eprintk_ctx("unknown object %u in mm image\n", u.vmai.cpt_object);
+				err = -EINVAL;
+				goto out;
+			}
+			offset += u.vmai.cpt_next;
+		} while (offset < pos + vmi->cpt_next);
+	}
+
+	down_write(&mm->mmap_sem);
+	mm->def_flags = def_flags;
+	up_write(&mm->mmap_sem);
+
+
+out:
+	return err;
+}
+
+extern void exit_mm(struct task_struct * tsk);
+
+int rst_mm_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	int err = 0;
+	cpt_object_t *mobj;
+	void *tmp = (void*)__get_free_page(GFP_KERNEL);
+	struct cpt_mm_image *vmi = (struct cpt_mm_image *)tmp;
+
+	if (!tmp)
+		return -ENOMEM;
+
+	if (ti->cpt_mm == CPT_NULL) {
+		if (current->mm) {
+			virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_EXIT,
+					current);
+			exit_mm(current);
+		}
+		goto out;
+	}
+
+	mobj = lookup_cpt_obj_bypos(CPT_OBJ_MM, ti->cpt_mm, ctx);
+	if (mobj) {
+		if (current->mm != mobj->o_obj) BUG();
+		goto out;
+	}
+
+	if (current->mm == NULL) {
+		struct mm_struct *mm = mm_alloc();
+		if (mm == NULL) {
+			err = -ENOMEM;
+			goto out;
+		}
+		err = init_new_context(current, mm);
+		if (err) {
+			mmdrop(mm);
+			goto out;
+		}
+		current->mm = mm;
+	}
+
+	if ((err = rst_get_object(CPT_OBJ_MM, ti->cpt_mm, vmi, ctx)) != 0)
+		goto out;
+	if ((err = do_rst_mm(vmi, ti->cpt_mm, ctx)) != 0) {
+		eprintk_ctx("do_rst_mm %Ld\n", (unsigned long long)ti->cpt_mm);
+		goto out;
+	}
+	err = -ENOMEM;
+	mobj = cpt_object_add(CPT_OBJ_MM, current->mm, ctx);
+	if (mobj != NULL) {
+		err = 0;
+		cpt_obj_setpos(mobj, ti->cpt_mm, ctx);
+	}
+
+out:
+	if (tmp)
+		free_page((unsigned long)tmp);
+	return err;
+}
+
+/* This is part of mm setup, made in parent context. Mostly, it is the place,
+ * where we graft mm of another process to child.
+ */
+
+int rst_mm_basic(cpt_object_t *obj, struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	task_t *tsk = obj->o_obj;
+	cpt_object_t *mobj;
+
+	/* Task without mm. Just get rid of this. */
+	if (ti->cpt_mm == CPT_NULL) {
+		if (tsk->mm) {
+			virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_EXIT,
+					tsk);
+			mmput(tsk->mm);
+			tsk->mm = NULL;
+		}
+		return 0;
+	}
+
+	mobj = lookup_cpt_obj_bypos(CPT_OBJ_MM, ti->cpt_mm, ctx);
+	if (mobj) {
+		struct mm_struct *newmm = mobj->o_obj;
+		/* Good, the MM is already created. */
+		if (newmm == tsk->mm) {
+			/* Already done by clone(). */
+			return 0;
+		}
+		mmput(tsk->mm);
+		atomic_inc(&newmm->mm_users);
+		tsk->mm = newmm;
+		tsk->active_mm = newmm;
+	}
+	return 0;
+}
+
+/* We use CLONE_VM when mm of child is going to be shared with parent.
+ * Otherwise mm is copied.
+ */
+
+__u32 rst_mm_flag(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	if (ti->cpt_mm == CPT_NULL ||
+	    lookup_cpt_obj_bypos(CPT_OBJ_MM, ti->cpt_mm, ctx))
+		return CLONE_VM;
+	return 0;
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/rst_net.c linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_net.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/rst_net.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_net.c	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,498 @@
+/*
+ *
+ *  kernel/cpt/rst_net.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/socket.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/ve.h>
+#include <linux/ve_proto.h>
+#include <net/route.h>
+#include <net/ip_fib.h>
+#include <net/addrconf.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_kernel.h"
+#include "cpt_net.h"
+
+#include "cpt_syscalls.h"
+
+extern struct in_ifaddr *inet_alloc_ifa(void);
+extern int inet_insert_ifa(struct in_ifaddr *ifa);
+
+int rst_restore_ifaddr(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_NET_IFADDR];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_ifaddr_image di;
+	struct net_device *dev;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_NET_IFADDR || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		int cindex = -1;
+		int err;
+		err = rst_get_object(CPT_OBJ_NET_IFADDR, sec, &di, ctx);
+		if (err)
+			return err;
+		cindex = di.cpt_index;
+		rtnl_lock();
+		dev = __dev_get_by_index(cindex);
+		if (dev && di.cpt_family == AF_INET) {
+			struct in_device *in_dev;
+			struct in_ifaddr *ifa;
+			if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
+				in_dev = inetdev_init(dev);
+			ifa = inet_alloc_ifa();
+			if (ifa) {
+				ifa->ifa_local = di.cpt_address[0];
+				ifa->ifa_address = di.cpt_peer[0];
+				ifa->ifa_broadcast = di.cpt_broadcast[0];
+				ifa->ifa_prefixlen = di.cpt_masklen;
+				ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
+				ifa->ifa_flags = di.cpt_flags;
+				ifa->ifa_scope = di.cpt_scope;
+				memcpy(ifa->ifa_label, di.cpt_label, IFNAMSIZ);
+				in_dev_hold(in_dev);
+				ifa->ifa_dev   = in_dev;
+				err = inet_insert_ifa(ifa);
+				if (err && err != -EEXIST) {
+					rtnl_unlock();
+					eprintk_ctx("add ifaddr err %d for %d %s\n", err, di.cpt_index, di.cpt_label);
+					return err;
+				}
+			}
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+		} else if (dev && di.cpt_family == AF_INET6) {
+			err = inet6_addr_add(dev->ifindex,
+					     (struct in6_addr *)di.cpt_address,
+					     di.cpt_masklen);
+			if (err && err != -EEXIST) {
+				rtnl_unlock();
+				eprintk_ctx("add ifaddr err %d for %d %s\n", err, di.cpt_index, di.cpt_label);
+				return err;
+			}
+#endif
+		} else {
+			rtnl_unlock();
+			eprintk_ctx("unknown ifaddr 2 for %d\n", di.cpt_index);
+			return -EINVAL;
+		}
+		rtnl_unlock();
+		sec += di.cpt_next;
+	}
+	return 0;
+}
+
+static int rewrite_rtmsg(struct nlmsghdr *nlh, struct cpt_context *ctx)
+{
+	int min_len = NLMSG_LENGTH(sizeof(struct rtmsg));
+	struct rtmsg *rtm = NLMSG_DATA(nlh);
+	__u32 prefix0 = 0;
+
+	if (nlh->nlmsg_len > min_len) {
+		int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
+		struct rtattr *rta = (void*)nlh + NLMSG_ALIGN(min_len);
+
+		while (RTA_OK(rta, attrlen)) {
+			if (rta->rta_type == RTA_DST) {
+				prefix0 = *(__u32*)RTA_DATA(rta);
+			}
+			rta = RTA_NEXT(rta, attrlen);
+		}
+	}
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+	if (rtm->rtm_family == AF_INET6) {
+		if (rtm->rtm_type == RTN_LOCAL)
+			return 2;
+		if (rtm->rtm_flags & RTM_F_CLONED)
+			return 2;
+		if (rtm->rtm_protocol == RTPROT_UNSPEC ||
+		    rtm->rtm_protocol == RTPROT_RA ||
+		    rtm->rtm_protocol == RTPROT_REDIRECT ||
+		    rtm->rtm_protocol == RTPROT_KERNEL)
+			return 2;
+		if (rtm->rtm_protocol == RTPROT_BOOT &&
+		    ((rtm->rtm_dst_len == 8 && prefix0 == htonl(0xFF000000)) ||
+		     (rtm->rtm_dst_len == 64 && prefix0 == htonl(0xFE800000))))
+			return 2;
+	}
+#endif
+	return rtm->rtm_protocol == RTPROT_KERNEL;
+}
+
+int rst_restore_route(struct cpt_context *ctx)
+{
+	int err;
+	struct socket *sock;
+	struct msghdr msg;
+	struct iovec iov;
+	struct sockaddr_nl nladdr;
+	mm_segment_t oldfs;
+	loff_t sec = ctx->sections[CPT_SECT_NET_ROUTE];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_object_hdr v;
+	char *pg;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_NET_ROUTE || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	if (h.cpt_hdrlen >= h.cpt_next)
+		return 0;
+
+	sec += h.cpt_hdrlen;
+	err = rst_get_object(CPT_OBJ_NET_ROUTE, sec, &v, ctx);
+	if (err < 0)
+		return err;
+
+	err = sock_create_kern(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE, &sock);
+	if (err)
+		return err;
+
+	pg = (char*)__get_free_page(GFP_KERNEL);
+	if (pg == NULL) {
+		err = -ENOMEM;
+		goto out_sock;
+	}
+
+	memset(&nladdr, 0, sizeof(nladdr));
+	nladdr.nl_family = AF_NETLINK;
+
+	endsec = sec + v.cpt_next;
+	sec += v.cpt_hdrlen;
+
+	while (sec < endsec) {
+		struct nlmsghdr *n;
+		struct nlmsghdr nh;
+		int kernel_flag;
+
+		if (endsec - sec < sizeof(nh))
+			break;
+
+		err = ctx->pread(&nh, sizeof(nh), ctx, sec);
+		if (err)
+			goto out_sock_pg;
+		if (nh.nlmsg_len < sizeof(nh) || nh.nlmsg_len > PAGE_SIZE ||
+		    endsec - sec < nh.nlmsg_len) {
+			err = -EINVAL;
+			goto out_sock_pg;
+		}
+		err = ctx->pread(pg, nh.nlmsg_len, ctx, sec);
+		if (err)
+			goto out_sock_pg;
+
+		n = (struct nlmsghdr*)pg;
+		n->nlmsg_flags = NLM_F_REQUEST|NLM_F_APPEND|NLM_F_CREATE;
+
+		err = rewrite_rtmsg(n, ctx);
+		if (err < 0)
+			goto out_sock_pg;
+		kernel_flag = err;
+
+		if (kernel_flag == 2)
+			goto do_next;
+
+		iov.iov_base=n;
+		iov.iov_len=nh.nlmsg_len;
+		msg.msg_name=&nladdr;
+		msg.msg_namelen=sizeof(nladdr);
+		msg.msg_iov=&iov;
+		msg.msg_iovlen=1;
+		msg.msg_control=NULL;
+		msg.msg_controllen=0;
+		msg.msg_flags=MSG_DONTWAIT;
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		err = sock_sendmsg(sock, &msg, nh.nlmsg_len);
+		set_fs(oldfs);
+
+		if (err < 0)
+			goto out_sock_pg;
+		err = 0;
+
+		iov.iov_base=pg;
+		iov.iov_len=PAGE_SIZE;
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		err = sock_recvmsg(sock, &msg, PAGE_SIZE, MSG_DONTWAIT);
+		set_fs(oldfs);
+		if (err != -EAGAIN) {
+			if (err == NLMSG_LENGTH(sizeof(struct nlmsgerr)) &&
+			    n->nlmsg_type == NLMSG_ERROR) {
+				struct nlmsgerr *e = NLMSG_DATA(n);
+				if (e->error != -EEXIST || !kernel_flag)
+					eprintk_ctx("NLMERR: %d\n", e->error);
+			} else {
+				eprintk_ctx("Res: %d %d\n", err, n->nlmsg_type);
+			}
+		}
+do_next:
+		err = 0;
+		sec += NLMSG_ALIGN(nh.nlmsg_len);
+	}
+
+out_sock_pg:
+	free_page((unsigned long)pg);
+out_sock:
+	sock_release(sock);
+	return err;
+}
+
+int rst_resume_network(struct cpt_context *ctx)
+{
+	struct ve_struct *env;
+
+	env = get_ve_by_id(ctx->ve_id);
+	if (!env)
+		return -ESRCH;
+	env->disable_net = 0;
+	put_ve(env);
+	return 0;
+}
+
+int rst_restore_netdev(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_NET_DEVICE];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_netdev_image di;
+	struct net_device *dev;
+
+	get_exec_env()->disable_net = 1;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_NET_DEVICE || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		int err;
+		struct net_device *dev_new;
+		err = rst_get_object(CPT_OBJ_NET_DEVICE, sec, &di, ctx);
+		if (err)
+			return err;
+		rtnl_lock();
+		dev = __dev_get_by_name(di.cpt_name);
+		if (dev) {
+			if (dev->ifindex != di.cpt_index) {
+				dev_new = __dev_get_by_index(di.cpt_index);
+				if (!dev_new) {
+					write_lock_bh(&dev_base_lock);
+					hlist_del(&dev->index_hlist);
+					if (dev->iflink == dev->ifindex)
+						dev->iflink = di.cpt_index;
+					dev->ifindex = di.cpt_index;
+					hlist_add_head(&dev->index_hlist,
+							dev_index_hash(dev->ifindex,
+								get_exec_env()));
+					write_unlock_bh(&dev_base_lock);
+				} else {
+					write_lock_bh(&dev_base_lock);
+					hlist_del(&dev->index_hlist);
+					hlist_del(&dev_new->index_hlist);
+					if (dev_new->iflink == dev_new->ifindex)
+						dev_new->iflink = dev->ifindex;
+					dev_new->ifindex = dev->ifindex;
+					if (dev->iflink == dev->ifindex)
+						dev->iflink = di.cpt_index;
+					dev->ifindex = di.cpt_index;
+					hlist_add_head(&dev->index_hlist,
+							dev_index_hash(dev->ifindex,
+								get_exec_env()));
+					hlist_add_head(&dev_new->index_hlist,
+							dev_index_hash(dev_new->ifindex,
+								get_exec_env()));
+					write_unlock_bh(&dev_base_lock);
+				}
+			}
+			if (di.cpt_flags^dev->flags) {
+				err = dev_change_flags(dev, di.cpt_flags);
+				if (err)
+					eprintk_ctx("dev_change_flags err: %d\n", err);
+			}
+		} else {
+			eprintk_ctx("unknown interface 2 %s\n", di.cpt_name);
+		}
+		rtnl_unlock();
+		sec += di.cpt_next;
+	}
+	return 0;
+}
+
+static int dumpfn(void *arg)
+{
+	int i;
+	int *pfd = arg;
+	char *argv[] = { "iptables-restore", "-c", NULL };
+
+	if (pfd[0] != 0)
+		sc_dup2(pfd[0], 0);
+
+	for (i=1; i<current->files->fdt->max_fds; i++)
+		sc_close(i);
+
+	module_put(THIS_MODULE);
+
+	set_fs(KERNEL_DS);
+	i = sc_execve("/sbin/iptables-restore", argv, NULL);
+	if (i == -ENOENT)
+		i = sc_execve("/usr/sbin/iptables-restore", argv, NULL);
+	eprintk("failed to exec iptables-restore: %d\n", i);
+	return 255 << 8;
+}
+
+static int rst_restore_iptables(struct cpt_context * ctx)
+{
+	int err;
+	int pfd[2];
+	struct file *f;
+	struct cpt_object_hdr v;
+	int n;
+	struct cpt_section_hdr h;
+	loff_t sec = ctx->sections[CPT_SECT_NET_IPTABLES];
+	loff_t end;
+	int pid;
+	int status;
+	mm_segment_t oldfs;
+	sigset_t ignore, blocked;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_NET_IPTABLES || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	if (h.cpt_hdrlen == h.cpt_next)
+		return 0;
+	if (h.cpt_hdrlen > h.cpt_next)
+		return -EINVAL;
+	sec += h.cpt_hdrlen;
+	err = rst_get_object(CPT_OBJ_NAME, sec, &v, ctx);
+	if (err < 0)
+		return err;
+
+	err = sc_pipe(pfd);
+	if (err < 0)
+		return err;
+	ignore.sig[0] = CPT_SIG_IGNORE_MASK;
+	sigprocmask(SIG_BLOCK, &ignore, &blocked);
+	pid = err = local_kernel_thread(dumpfn, (void*)pfd, SIGCHLD, 0);
+	if (err < 0) {
+		eprintk_ctx("iptables local_kernel_thread: %d\n", err);
+		goto out;
+	}
+	f = fget(pfd[1]);
+	sc_close(pfd[1]);
+	sc_close(pfd[0]);
+
+	ctx->file->f_pos = sec + v.cpt_hdrlen;
+	end = sec + v.cpt_next;
+	do {
+		char *p;
+		char buf[16];
+
+		n = end - ctx->file->f_pos;
+		if (n > sizeof(buf))
+			n = sizeof(buf);
+
+		if (ctx->read(buf, n, ctx))
+			break;
+		if ((p = memchr(buf, 0, n)) != NULL)
+			n = p - buf;
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		f->f_op->write(f, buf, n, &f->f_pos);
+		set_fs(oldfs);
+	} while (ctx->file->f_pos < end);
+
+	fput(f);
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	if ((err = sc_waitx(pid, 0, &status)) < 0)
+		eprintk_ctx("wait4: %d\n", err);
+	else if ((status & 0x7f) == 0) {
+		err = (status & 0xff00) >> 8;
+		if (err != 0) {
+			eprintk_ctx("iptables-restore exited with %d\n", err);
+			err = -EINVAL;
+		}
+	} else {
+		eprintk_ctx("iptables-restore terminated\n");
+		err = -EINVAL;
+	}
+	set_fs(oldfs);
+	sigprocmask(SIG_SETMASK, &blocked, NULL);
+
+	return err;
+
+out:
+	if (pfd[1] >= 0)
+		sc_close(pfd[1]);
+	if (pfd[0] >= 0)
+		sc_close(pfd[0]);
+	sigprocmask(SIG_SETMASK, &blocked, NULL);
+	return err;
+}
+
+int rst_restore_net(struct cpt_context *ctx)
+{
+	int err;
+
+	err = rst_restore_netdev(ctx);
+	if (!err)
+		err = rst_restore_ifaddr(ctx);
+	if (!err)
+		err = rst_restore_route(ctx);
+	if (!err)
+		err = rst_restore_iptables(ctx);
+	if (!err)
+		err = rst_restore_ip_conntrack(ctx);
+	return err;
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/rst_proc.c linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_proc.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/rst_proc.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_proc.c	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,580 @@
+/*
+ *
+ *  kernel/cpt/rst_proc.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+#include <linux/cpt_ioctl.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_dump.h"
+#include "cpt_files.h"
+#include "cpt_mm.h"
+#include "cpt_kernel.h"
+
+MODULE_AUTHOR("Alexey Kuznetsov <alexey@sw.ru>");
+MODULE_LICENSE("GPL");
+
+/* List of contexts and lock protecting the list */
+static struct list_head cpt_context_list;
+static spinlock_t cpt_context_lock;
+
+static int proc_read(char *buffer, char **start, off_t offset,
+		     int length, int *eof, void *data)
+{
+	off_t pos = 0;
+	off_t begin = 0;
+	int len = 0;
+	cpt_context_t *ctx;
+
+	len += sprintf(buffer, "Ctx      Id       VE       State\n");
+
+	spin_lock(&cpt_context_lock);
+
+	list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
+		len += sprintf(buffer+len,"%p %08x %-8u %d",
+			       ctx,
+			       ctx->contextid,
+			       ctx->ve_id,
+			       ctx->ctx_state
+			       );
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+		len += pagein_info_printf(buffer+len, ctx);
+#endif
+
+		buffer[len++] = '\n';
+
+		pos = begin+len;
+		if (pos < offset) {
+			len = 0;
+			begin = pos;
+		}
+		if (pos > offset+length)
+			goto done;
+	}
+	*eof = 1;
+
+done:
+	spin_unlock(&cpt_context_lock);
+	*start = buffer + (offset - begin);
+	len -= (offset - begin);
+	if(len > length)
+		len = length;
+	if(len < 0)
+		len = 0;
+	return len;
+}
+
+void rst_context_release(cpt_context_t *ctx)
+{
+	list_del(&ctx->ctx_list);
+	spin_unlock(&cpt_context_lock);
+
+	if (ctx->ctx_state > 0)
+		rst_resume(ctx);
+	ctx->ctx_state = CPT_CTX_ERROR;
+
+	rst_close_dumpfile(ctx);
+
+	if (ctx->anonvmas) {
+		int h;
+		for (h = 0; h < CPT_ANONVMA_HSIZE; h++) {
+			while (!hlist_empty(&ctx->anonvmas[h])) {
+				struct hlist_node *elem = ctx->anonvmas[h].first;
+				hlist_del(elem);
+				kfree(elem);
+			}
+		}
+		free_page((unsigned long)ctx->anonvmas);
+	}
+	cpt_flush_error(ctx);
+	if (ctx->errorfile) {
+		fput(ctx->errorfile);
+		ctx->errorfile = NULL;
+	}
+	if (ctx->error_msg) {
+		free_page((unsigned long)ctx->error_msg);
+		ctx->error_msg = NULL;
+	}
+#ifdef CONFIG_VZ_CHECKPOINT_ITER
+	rst_drop_iter_dir(ctx);
+#endif
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	if (ctx->pagein_file_out)
+		fput(ctx->pagein_file_out);
+	if (ctx->pagein_file_in)
+		fput(ctx->pagein_file_in);
+	if (ctx->pgin_task)
+		put_task_struct(ctx->pgin_task);
+#endif
+	if (ctx->filejob_queue)
+		rst_flush_filejobs(ctx);
+	if (ctx->objcount)
+		eprintk_ctx("%d objects leaked\n", ctx->objcount);
+	kfree(ctx);
+
+	spin_lock(&cpt_context_lock);
+}
+
+static void __cpt_context_put(cpt_context_t *ctx)
+{
+	if (!--ctx->refcount)
+		rst_context_release(ctx);
+}
+
+static void cpt_context_put(cpt_context_t *ctx)
+{
+	spin_lock(&cpt_context_lock);
+	__cpt_context_put(ctx);
+	spin_unlock(&cpt_context_lock);
+}
+
+cpt_context_t * rst_context_open(void)
+{
+	cpt_context_t *ctx;
+
+	if ((ctx = kmalloc(sizeof(*ctx), GFP_KERNEL)) != NULL) {
+		rst_context_init(ctx);
+		spin_lock(&cpt_context_lock);
+		list_add_tail(&ctx->ctx_list, &cpt_context_list);
+		spin_unlock(&cpt_context_lock);
+		ctx->error_msg = (char*)__get_free_page(GFP_KERNEL);
+		if (ctx->error_msg != NULL)
+			ctx->error_msg[0] = 0;
+	}
+	return ctx;
+}
+
+void rst_report_error(int err, cpt_context_t *ctx)
+{
+	if (ctx->statusfile) {
+		mm_segment_t oldfs;
+		int status = 7 /* VZ_ENVCREATE_ERROR */;
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		if (ctx->statusfile->f_op && ctx->statusfile->f_op->write)
+			ctx->statusfile->f_op->write(ctx->statusfile, (char*)&status, sizeof(status), &ctx->statusfile->f_pos);
+		set_fs(oldfs);
+		fput(ctx->statusfile);
+		ctx->statusfile = NULL;
+	}
+}
+
+
+static cpt_context_t * cpt_context_lookup(unsigned int ctxid)
+{
+	cpt_context_t *ctx;
+
+	spin_lock(&cpt_context_lock);
+	list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
+		if (ctx->contextid == ctxid) {
+			ctx->refcount++;
+			spin_unlock(&cpt_context_lock);
+			return ctx;
+		}
+	}
+	spin_unlock(&cpt_context_lock);
+	return NULL;
+}
+
+static int rst_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg)
+{
+	int err = 0;
+	cpt_context_t *ctx;
+	struct file *dfile = NULL;
+
+	unlock_kernel();
+
+	if (cmd == CPT_TEST_CAPS) {
+		err = test_cpu_caps();
+		goto out_lock;
+	}
+
+	if (cmd == CPT_JOIN_CONTEXT || cmd == CPT_PUT_CONTEXT) {
+		cpt_context_t *old_ctx;
+
+		ctx = NULL;
+		if (cmd == CPT_JOIN_CONTEXT) {
+			err = -ENOENT;
+			ctx = cpt_context_lookup(arg);
+			if (!ctx)
+				goto out_lock;
+		}
+
+		spin_lock(&cpt_context_lock);
+		old_ctx = (cpt_context_t*)file->private_data;
+		file->private_data = ctx;
+
+		if (old_ctx) {
+			if (cmd == CPT_PUT_CONTEXT && old_ctx->sticky) {
+				old_ctx->sticky = 0;
+				old_ctx->refcount--;
+			}
+			__cpt_context_put(old_ctx);
+		}
+		spin_unlock(&cpt_context_lock);
+		err = 0;
+		goto out_lock;
+	}
+
+	spin_lock(&cpt_context_lock);
+	ctx = (cpt_context_t*)file->private_data;
+	if (ctx)
+		ctx->refcount++;
+	spin_unlock(&cpt_context_lock);
+
+	if (!ctx) {
+		cpt_context_t *old_ctx;
+
+		err = -ENOMEM;
+		ctx = rst_context_open();
+		if (!ctx)
+			goto out_lock;
+
+		spin_lock(&cpt_context_lock);
+		old_ctx = (cpt_context_t*)file->private_data;
+		if (!old_ctx) {
+			ctx->refcount++;
+			file->private_data = ctx;
+		} else {
+			old_ctx->refcount++;
+		}
+		if (old_ctx) {
+			__cpt_context_put(ctx);
+			ctx = old_ctx;
+		}
+		spin_unlock(&cpt_context_lock);
+	}
+
+	if (cmd == CPT_GET_CONTEXT) {
+		unsigned int contextid = (unsigned int)arg;
+
+		err = -EINVAL;
+		if (ctx->contextid && ctx->contextid != contextid)
+			goto out_nosem;
+		if (!ctx->contextid) {
+			cpt_context_t *c1 = cpt_context_lookup(contextid);
+			if (c1) {
+				cpt_context_put(c1);
+				err = -EEXIST;
+				goto out_nosem;
+			}
+			ctx->contextid = contextid;
+		}
+		spin_lock(&cpt_context_lock);
+		if (!ctx->sticky) {
+			ctx->sticky = 1;
+			ctx->refcount++;
+		}
+		spin_unlock(&cpt_context_lock);
+		err = 0;
+		goto out_nosem;
+	}
+
+	down(&ctx->main_sem);
+
+	err = -EBUSY;
+	if (ctx->ctx_state < 0)
+		goto out;
+
+	err = 0;
+	switch (cmd) {
+	case CPT_SET_DUMPFD:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		if (arg >= 0) {
+			err = -EBADF;
+			dfile = fget(arg);
+			if (dfile == NULL)
+				break;
+			if (dfile->f_op == NULL ||
+			    dfile->f_op->read == NULL) {
+				fput(dfile);
+				break;
+			}
+			err = 0;
+		}
+		if (ctx->file)
+			fput(ctx->file);
+		ctx->file = dfile;
+		break;
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	case CPT_SET_PAGEINFDIN:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (dfile == NULL) {
+				err = -EBADF;
+				break;
+			}
+		}
+		if (ctx->pagein_file_in)
+			fput(ctx->pagein_file_in);
+		ctx->pagein_file_in = dfile;
+		break;
+	case CPT_SET_PAGEINFDOUT:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (dfile == NULL) {
+				err = -EBADF;
+				break;
+			}
+		}
+		if (ctx->pagein_file_out)
+			fput(ctx->pagein_file_out);
+		ctx->pagein_file_out = dfile;
+		break;
+	case CPT_PAGEIND:
+		err = rst_pageind(ctx);
+		break;
+#endif
+#ifdef CONFIG_VZ_CHECKPOINT_ITER
+	case CPT_ITER:
+		err = rst_iteration(ctx);
+		break;
+#endif
+	case CPT_SET_LOCKFD:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (dfile == NULL) {
+				err = -EBADF;
+				break;
+			}
+		}
+		if (ctx->lockfile)
+			fput(ctx->lockfile);
+		ctx->lockfile = dfile;
+		break;
+	case CPT_SET_STATUSFD:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (dfile == NULL) {
+				err = -EBADF;
+				break;
+			}
+		}
+		if (ctx->statusfile)
+			fput(ctx->statusfile);
+		ctx->statusfile = dfile;
+		break;
+	case CPT_SET_ERRORFD:
+		if (arg >= 0) {
+			dfile = fget(arg);
+			if (dfile == NULL) {
+				err = -EBADF;
+				break;
+			}
+		}
+		if (ctx->errorfile)
+			fput(ctx->errorfile);
+		ctx->errorfile = dfile;
+		break;
+	case CPT_SET_VEID:
+		if (ctx->ctx_state > 0) {
+			err = -EBUSY;
+			break;
+		}
+		ctx->ve_id = arg;
+		break;
+	case CPT_UNDUMP:
+		if (ctx->ctx_state > 0) {
+			err = -ENOENT;
+			break;
+		}
+		ctx->ctx_state = CPT_CTX_UNDUMPING;
+		err = vps_rst_undump(ctx);
+		if (err) {
+			rst_report_error(err, ctx);
+			if (rst_kill(ctx) == 0)
+				ctx->ctx_state = CPT_CTX_IDLE;
+		} else {
+			ctx->ctx_state = CPT_CTX_UNDUMPED;
+		}
+		break;
+	case CPT_RESUME:
+		if (!ctx->ctx_state) {
+			err = -ENOENT;
+			break;
+		}
+		err = rst_resume(ctx);
+		if (!err)
+			ctx->ctx_state = CPT_CTX_IDLE;
+		break;
+	case CPT_KILL:
+		if (!ctx->ctx_state) {
+			err = -ENOENT;
+			break;
+		}
+		err = rst_kill(ctx);
+		if (!err)
+			ctx->ctx_state = CPT_CTX_IDLE;
+		break;
+	default:
+		err = -EINVAL;
+		break;
+	}
+
+out:
+	cpt_flush_error(ctx);
+	up(&ctx->main_sem);
+out_nosem:
+	cpt_context_put(ctx);
+out_lock:
+	lock_kernel();
+	if (err == -ERESTARTSYS || err == -ERESTARTNOINTR ||
+	    err == -ERESTARTNOHAND || err == -ERESTART_RESTARTBLOCK)
+		err = -EINTR;
+	return err;
+}
+
+static int rst_open(struct inode * inode, struct file * file)
+{
+	if (!try_module_get(THIS_MODULE))
+		return -EBUSY;
+
+	return 0;
+}
+
+static int rst_release(struct inode * inode, struct file * file)
+{
+	cpt_context_t *ctx;
+
+	spin_lock(&cpt_context_lock);
+	ctx = (cpt_context_t*)file->private_data;
+	file->private_data = NULL;
+	if (ctx)
+		__cpt_context_put(ctx);
+	spin_unlock(&cpt_context_lock);
+
+
+	module_put(THIS_MODULE);
+	return 0;
+}
+
+static struct file_operations rst_fops =
+{
+	.owner		= THIS_MODULE,
+	.ioctl		= rst_ioctl,
+	.open		= rst_open,
+	.release	= rst_release,
+};
+
+
+static struct proc_dir_entry *proc_ent;
+extern void *schedule_tail_p;
+extern void schedule_tail_hook(void);
+
+static struct ctl_table_header *ctl_header;
+
+static ctl_table debug_table[] = {
+	{
+		.ctl_name	= 9476,
+		.procname	= "rst",
+		.data		= &debug_level,
+		.maxlen		= sizeof(debug_level),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{ .ctl_name = 0 }
+};
+static ctl_table root_table[] = {
+	{
+		.ctl_name	= CTL_DEBUG,
+		.procname	= "debug",
+		.mode		= 0555,
+		.child		= debug_table,
+	},
+	{ .ctl_name = 0 }
+};
+
+static int __init init_rst(void)
+{
+	int err;
+
+	err = -ENOMEM;
+	ctl_header = register_sysctl_table(root_table, 0);
+	if (!ctl_header)
+		goto err_mon;
+
+	spin_lock_init(&cpt_context_lock);
+	INIT_LIST_HEAD(&cpt_context_list);
+
+	err = -EINVAL;
+	proc_ent = create_proc_entry_mod("rst", 0600, NULL, THIS_MODULE);
+	if (!proc_ent)
+		goto err_out;
+
+	rst_fops.read = proc_ent->proc_fops->read;
+	rst_fops.write = proc_ent->proc_fops->write;
+	rst_fops.llseek = proc_ent->proc_fops->llseek;
+	proc_ent->proc_fops = &rst_fops;
+
+	proc_ent->read_proc = proc_read;
+	proc_ent->data = NULL;
+	proc_ent->owner = THIS_MODULE;
+	return 0;
+
+err_out:
+	unregister_sysctl_table(ctl_header);
+err_mon:
+	return err;
+}
+module_init(init_rst);
+
+static void __exit exit_rst(void)
+{
+	remove_proc_entry("rst", NULL);
+	unregister_sysctl_table(ctl_header);
+
+	spin_lock(&cpt_context_lock);
+	while (!list_empty(&cpt_context_list)) {
+		cpt_context_t *ctx;
+		ctx = list_entry(cpt_context_list.next, cpt_context_t, ctx_list);
+
+		if (!ctx->sticky)
+			ctx->refcount++;
+		ctx->sticky = 0;
+
+		BUG_ON(ctx->refcount != 1);
+
+		__cpt_context_put(ctx);
+	}
+	spin_unlock(&cpt_context_lock);
+}
+module_exit(exit_rst);
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/rst_process.c linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_process.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/rst_process.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_process.c	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,1548 @@
+/*
+ *
+ *  kernel/cpt/rst_process.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/virtinfo.h>
+#include <linux/virtinfoscp.h>
+#include <linux/kmem_cache.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/ptrace.h>
+#include <linux/tty.h>
+#ifdef CONFIG_X86
+#include <asm/desc.h>
+#endif
+#include <asm/unistd.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_misc.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_files.h"
+#include "cpt_mm.h"
+#include "cpt_ubc.h"
+#include "cpt_process.h"
+#include "cpt_kernel.h"
+
+
+#define HOOK_RESERVE	256
+
+struct resume_info
+{
+	void (*hook)(struct resume_info *);
+	unsigned long	hooks;
+#define HOOK_TID	0
+#define HOOK_CONT	1
+#define HOOK_LSI	2
+#define HOOK_RESTART	3
+	unsigned long	tid_ptrs[2];
+	siginfo_t	last_siginfo;
+};
+
+#ifdef CONFIG_X86_32
+
+#define IN_SYSCALL(regs)	((long)(regs)->orig_eax >= 0)
+#define IN_ERROR(regs)		((long)(regs)->eax < 0)
+#define SYSCALL_ERRNO(regs)	(-(long)((regs)->eax))
+#define SYSCALL_RETVAL(regs)	((regs)->eax)
+#define SYSCALL_NR(regs)	((regs)->orig_eax)
+
+#define SYSCALL_SETRET(regs,val)	do { (regs)->eax = (val); } while (0)
+
+#define SYSCALL_RESTART2(regs,new)	do { (regs)->eax = (new); \
+					     (regs)->eip -= 2; } while (0) 
+
+#define syscall_is(tsk,regs,name)	(SYSCALL_NR(regs) == __NR_##name)
+
+/* In new kernels task_pt_regs() is define to something inappropriate */
+#undef task_pt_regs
+#define task_pt_regs(t) ((struct pt_regs *)((t)->thread.esp0) - 1)
+
+#elif defined(CONFIG_X86_64)
+
+#define IN_SYSCALL(regs)	((long)(regs)->orig_rax >= 0)
+#define IN_ERROR(regs)		((long)(regs)->rax < 0)
+#define SYSCALL_ERRNO(regs)	(-(long)((regs)->rax))
+#define SYSCALL_RETVAL(regs)	((regs)->rax)
+#define SYSCALL_NR(regs)	((regs)->orig_rax)
+
+#define SYSCALL_SETRET(regs,val)	do { (regs)->rax = (val); } while (0)
+
+#define SYSCALL_RESTART2(regs,new)	do { (regs)->rax = (new); \
+					     (regs)->rip -= 2; } while (0) 
+
+#define __NR32_restart_syscall	0
+#define __NR32_rt_sigtimedwait	177
+#define __NR32_pause		29
+#define __NR32_futex		240
+
+#define syscall_is(tsk,regs,name) ((!((tsk)->thread_info->flags&_TIF_IA32) && \
+				    SYSCALL_NR(regs) == __NR_##name) || \
+				   (((tsk)->thread_info->flags&_TIF_IA32) && \
+				    SYSCALL_NR(regs) == __NR32_##name))
+
+#elif defined (CONFIG_IA64)
+
+#define IN_SYSCALL(regs)	((long)(regs)->cr_ifs >= 0)
+#define IN_ERROR(regs)		((long)(regs)->r10 == -1)
+#define SYSCALL_ERRNO(regs)	((regs)->r10 == -1 ? (long)((regs)->r8) : 0)
+#define SYSCALL_RETVAL(regs)	((regs)->r8)
+#define SYSCALL_NR(regs)	((regs)->cr_ifs >= 0 ? (regs)->r15 : -1)
+
+#define SYSCALL_SETRET(regs,val)	do { (regs)->r8 = (val); } while (0)
+
+#define SYSCALL_RESTART2(regs,new)	do { (regs)->r15 = (new); \
+					     (regs)->r10 = 0; \
+					     ia64_decrement_ip(regs); } while (0) 
+
+#define syscall_is(tsk,regs,name)	(SYSCALL_NR(regs) == __NR_##name)
+
+#else
+
+#error This arch is not supported
+
+#endif
+
+#define SYSCALL_RESTART(regs) SYSCALL_RESTART2(regs, SYSCALL_NR(regs))
+
+
+static void decode_siginfo(siginfo_t *info, struct cpt_siginfo_image *si)
+{
+	memset(info, 0, sizeof(*info));
+	switch(si->cpt_code & __SI_MASK) {
+	case __SI_TIMER:
+		info->si_tid = si->cpt_pid;
+		info->si_overrun = si->cpt_uid;
+		info->_sifields._timer._sigval.sival_ptr = cpt_ptr_import(si->cpt_sigval);
+		info->si_sys_private = si->cpt_utime;
+		break;
+	case __SI_POLL:
+		info->si_band = si->cpt_pid;
+		info->si_fd = si->cpt_uid;
+		break;
+	case __SI_FAULT:
+		info->si_addr = cpt_ptr_import(si->cpt_sigval);
+#ifdef __ARCH_SI_TRAPNO
+		info->si_trapno = si->cpt_pid;
+#endif
+		break;
+	case __SI_CHLD:
+		info->si_pid = si->cpt_pid;
+		info->si_uid = si->cpt_uid;
+		info->si_status = si->cpt_sigval;
+		info->si_stime = si->cpt_stime;
+		info->si_utime = si->cpt_utime;
+		break;
+	case __SI_KILL:
+	case __SI_RT:
+	case __SI_MESGQ:
+	default:
+		info->si_pid = si->cpt_pid;
+		info->si_uid = si->cpt_uid;
+		info->si_ptr = cpt_ptr_import(si->cpt_sigval);
+		break;
+	}
+	info->si_signo = si->cpt_signo;
+	info->si_errno = si->cpt_errno;
+	info->si_code = si->cpt_code;
+}
+
+static int restore_sigqueue(task_t *tsk,
+			    struct sigpending *queue, unsigned long start,
+			    unsigned long end)
+{
+	while (start < end) {
+		struct cpt_siginfo_image *si = (struct cpt_siginfo_image *)start;
+		if (si->cpt_object == CPT_OBJ_SIGINFO) {
+			struct sigqueue *q = NULL;
+			struct user_struct *up;
+			up = alloc_uid(si->cpt_user);
+			if (!up)
+				return -ENOMEM;
+			q = kmem_cache_alloc(sigqueue_cachep, GFP_ATOMIC);
+			if (!q) {
+				free_uid(up);
+				return -ENOMEM;
+			}
+			if (ub_siginfo_charge(q, get_exec_ub())) {
+				kmem_cache_free(sigqueue_cachep, q);
+				free_uid(up);
+				return -ENOMEM;
+			}
+
+			INIT_LIST_HEAD(&q->list);
+			/* Preallocated elements (posix timers) are not
+			 * supported yet. It is safe to replace them with
+			 * a private one. */
+			q->flags = 0;
+			q->user = up;
+			atomic_inc(&q->user->sigpending);
+
+			decode_siginfo(&q->info, si);
+			list_add_tail(&q->list, &queue->list);
+		}
+		start += si->cpt_next;
+	}
+	return 0;
+}
+
+int rst_process_linkage(cpt_context_t *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+		struct cpt_task_image *ti = obj->o_image;
+
+		if (tsk == NULL) {
+			eprintk_ctx("task %u(%s) is missing\n", ti->cpt_pid, ti->cpt_comm);
+			return -EINVAL;
+		}
+
+		if (virt_pgid(tsk) != ti->cpt_pgrp) {
+			int pid;
+
+			if ((pid = vpid_to_pid(ti->cpt_pgrp)) < 0) {
+				eprintk_ctx("illegal PGRP " CPT_FID "\n", CPT_TID(tsk));
+				return -EINVAL;
+			}
+
+			write_lock_irq(&tasklist_lock);
+			detach_pid(tsk, PIDTYPE_PGID);
+			tsk->signal->pgrp = pid;
+			set_virt_pgid(tsk, ti->cpt_pgrp);
+			if (thread_group_leader(tsk))
+				attach_pid(tsk, PIDTYPE_PGID, pid);
+			write_unlock_irq(&tasklist_lock);
+		}
+		if (virt_sid(tsk) != ti->cpt_session) {
+			int pid;
+
+			if ((pid = vpid_to_pid(ti->cpt_session)) < 0) {
+				eprintk_ctx("illegal SID " CPT_FID "\n", CPT_TID(tsk));
+				return -EINVAL;
+			}
+
+			write_lock_irq(&tasklist_lock);
+			detach_pid(tsk, PIDTYPE_SID);
+			tsk->signal->session = pid;
+			set_virt_sid(tsk, ti->cpt_session);
+			if (thread_group_leader(tsk))
+				attach_pid(tsk, PIDTYPE_SID, pid);
+			write_unlock_irq(&tasklist_lock);
+		}
+		if (ti->cpt_old_pgrp > 0 && tsk->signal->tty_old_pgrp == 0) {
+			int pid;
+
+			if ((pid = vpid_to_pid(ti->cpt_old_pgrp)) < 0) {
+				eprintk_ctx("illegal OLD_PGRP " CPT_FID "\n", CPT_TID(tsk));
+				return -EINVAL;
+			}
+
+			tsk->signal->tty_old_pgrp = pid;
+		}
+	}
+
+	return 0;
+}
+
+static int
+restore_one_signal_struct(struct cpt_task_image *ti, int *exiting, cpt_context_t *ctx)
+{
+	int err;
+	struct cpt_signal_image *si = cpt_get_buf(ctx);
+
+	current->signal->tty = NULL;
+
+	err = rst_get_object(CPT_OBJ_SIGNAL_STRUCT, ti->cpt_signal, si, ctx);
+	if (err) {
+		cpt_release_buf(ctx);
+		return err;
+	}
+
+	if (virt_pgid(current) != si->cpt_pgrp) {
+		int err;
+		int pid = 0;
+
+		if (si->cpt_pgrp_type == CPT_PGRP_ORPHAN) {
+			if (!is_virtual_pid(si->cpt_pgrp)) {
+				eprintk_ctx("external process group " CPT_FID, CPT_TID(current));
+				cpt_release_buf(ctx);
+				return -EINVAL;
+			}
+			pid = alloc_pidmap();
+			if (pid < 0) {
+				cpt_release_buf(ctx);
+				return -EINVAL;
+			}
+			if ((err = alloc_vpid(pid, si->cpt_pgrp)) < 0) {
+				free_pidmap(pid);
+				pid = 0;
+				if (err != -EEXIST) {
+					cpt_release_buf(ctx);
+					return err;
+				}
+			}
+		}
+		if (pid ||
+		    (pid = vpid_to_pid(si->cpt_pgrp)) > 0) {
+			write_lock_irq(&tasklist_lock);
+			detach_pid(current, PIDTYPE_PGID);
+			current->signal->pgrp = pid;
+			set_virt_pgid(current, si->cpt_pgrp);
+			if (thread_group_leader(current))
+				attach_pid(current, PIDTYPE_PGID, pid);
+			write_unlock_irq(&tasklist_lock);
+		}
+	}
+
+	current->signal->tty_old_pgrp = 0;
+	if ((int)si->cpt_old_pgrp > 0) {
+		if (si->cpt_old_pgrp_type == CPT_PGRP_STRAY) {
+			current->signal->tty_old_pgrp = alloc_pidmap();
+			if (current->signal->tty_old_pgrp < 0) {
+				eprintk_ctx("failed to allocate stray tty_old_pgrp\n");
+				cpt_release_buf(ctx);
+				return -EINVAL;
+			}
+			free_pidmap(current->signal->tty_old_pgrp);
+		} else {
+			current->signal->tty_old_pgrp = vpid_to_pid(si->cpt_old_pgrp);
+			if (current->signal->tty_old_pgrp < 0) {
+				dprintk_ctx("forward old tty PGID\n");
+				current->signal->tty_old_pgrp = 0;
+			}
+		}
+	}
+
+	if (virt_sid(current) != si->cpt_session) {
+		int err;
+		int pid = 0;
+
+		if (si->cpt_session_type == CPT_PGRP_ORPHAN) {
+			if (!is_virtual_pid(si->cpt_session)) {
+				eprintk_ctx("external process session " CPT_FID, CPT_TID(current));
+				cpt_release_buf(ctx);
+				return -EINVAL;
+			}
+			pid = alloc_pidmap();
+			if (pid < 0) {
+				cpt_release_buf(ctx);
+				return -EINVAL;
+			}
+			if ((err = alloc_vpid(pid, si->cpt_session)) < 0) {
+				free_pidmap(pid);
+				pid = 0;
+				if (err != -EEXIST) {
+					cpt_release_buf(ctx);
+					return err;
+				}
+			}
+		}
+		if (pid ||
+		    (pid = vpid_to_pid(si->cpt_session)) > 0) {
+			write_lock_irq(&tasklist_lock);
+			detach_pid(current, PIDTYPE_SID);
+			set_virt_sid(current, si->cpt_session);
+			current->signal->session = pid;
+			if (thread_group_leader(current))
+				attach_pid(current, PIDTYPE_SID, pid);
+			write_unlock_irq(&tasklist_lock);
+		}
+	}
+
+	cpt_sigset_import(&current->signal->shared_pending.signal, si->cpt_sigpending);
+	current->signal->leader = si->cpt_leader;
+	if (si->cpt_ctty != CPT_NULL) {
+		cpt_object_t *obj = lookup_cpt_obj_bypos(CPT_OBJ_TTY, si->cpt_ctty, ctx);
+		if (obj) {
+			struct tty_struct *tty = obj->o_obj;
+			if (tty->session == 0 || tty->session == current->signal->session) {
+				tty->session = current->signal->session;
+				current->signal->tty = tty;
+			} else {
+				wprintk_ctx("tty session mismatch\n");
+			}
+		}
+	}
+
+	if (si->cpt_curr_target)
+		current->signal->curr_target = find_task_by_pid_ve(si->cpt_curr_target);
+	current->signal->flags = 0;
+	*exiting = si->cpt_group_exit;
+	current->signal->group_exit_code = si->cpt_group_exit_code;
+	if (si->cpt_group_exit_task) {
+		current->signal->group_exit_task = find_task_by_pid_ve(si->cpt_group_exit_task);
+		if (current->signal->group_exit_task == NULL) {
+			eprintk_ctx("oops, group_exit_task=NULL, pid=%u\n", si->cpt_group_exit_task);
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+	}
+	current->signal->notify_count = si->cpt_notify_count;
+	current->signal->group_stop_count = si->cpt_group_stop_count;
+
+	if (si->cpt_next > si->cpt_hdrlen) {
+		char *buf = kmalloc(si->cpt_next - si->cpt_hdrlen, GFP_KERNEL);
+		if (buf == NULL) {
+			cpt_release_buf(ctx);
+			return -ENOMEM;
+		}
+		err = ctx->pread(buf, si->cpt_next - si->cpt_hdrlen, ctx,
+				 ti->cpt_signal + si->cpt_hdrlen);
+		if (err) {
+			kfree(buf);
+			cpt_release_buf(ctx);
+			return err;
+		}
+		restore_sigqueue(current,
+				 &current->signal->shared_pending, (unsigned long)buf,
+				 (unsigned long)buf + si->cpt_next - si->cpt_hdrlen);
+		kfree(buf);
+	}
+	cpt_release_buf(ctx);
+	return 0;
+}
+
+int restore_one_sighand_struct(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_sighand_image si;
+	int i;
+	loff_t pos, endpos;
+
+	err = rst_get_object(CPT_OBJ_SIGHAND_STRUCT, ti->cpt_sighand, &si, ctx);
+	if (err)
+		return err;
+
+	for (i=0; i<_NSIG; i++) {
+		current->sighand->action[i].sa.sa_handler = SIG_DFL;
+#ifndef CONFIG_IA64
+		current->sighand->action[i].sa.sa_restorer = 0;
+#endif
+		current->sighand->action[i].sa.sa_flags = 0;
+		memset(&current->sighand->action[i].sa.sa_mask, 0, sizeof(sigset_t));
+	}
+
+	pos = ti->cpt_sighand + si.cpt_hdrlen;
+	endpos = ti->cpt_sighand + si.cpt_next;
+	while (pos < endpos) {
+		struct cpt_sighandler_image shi;
+
+		err = rst_get_object(CPT_OBJ_SIGHANDLER, pos, &shi, ctx);
+		if (err)
+			return err;
+		current->sighand->action[shi.cpt_signo].sa.sa_handler = (void*)(unsigned long)shi.cpt_handler;
+#ifndef CONFIG_IA64
+		current->sighand->action[shi.cpt_signo].sa.sa_restorer = (void*)(unsigned long)shi.cpt_restorer;
+#endif
+		current->sighand->action[shi.cpt_signo].sa.sa_flags = shi.cpt_flags;
+		cpt_sigset_import(&current->sighand->action[shi.cpt_signo].sa.sa_mask, shi.cpt_mask);
+		pos += shi.cpt_next;
+	}
+
+	return 0;
+}
+
+
+__u32 rst_signal_flag(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	__u32 flag = 0;
+
+	if (lookup_cpt_obj_bypos(CPT_OBJ_SIGNAL_STRUCT, ti->cpt_signal, ctx))
+		flag |= CLONE_THREAD;
+	if (ti->cpt_sighand == CPT_NULL ||
+	    lookup_cpt_obj_bypos(CPT_OBJ_SIGHAND_STRUCT, ti->cpt_sighand, ctx))
+		flag |= CLONE_SIGHAND;
+	return flag;
+}
+
+int
+rst_signal_complete(struct cpt_task_image *ti, int * exiting, cpt_context_t *ctx)
+{
+	int err;
+	cpt_object_t *obj;
+
+	if (ti->cpt_signal == CPT_NULL || ti->cpt_sighand == CPT_NULL) {
+		return -EINVAL;
+	}
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_SIGHAND_STRUCT, ti->cpt_sighand, ctx);
+	if (obj) {
+		struct sighand_struct *sig = current->sighand;
+		if (obj->o_obj != sig) {
+			return -EINVAL;
+		}
+	} else {
+		obj = cpt_object_add(CPT_OBJ_SIGHAND_STRUCT, current->sighand, ctx);
+		if (obj == NULL)
+			return -ENOMEM;
+		cpt_obj_setpos(obj, ti->cpt_sighand, ctx);
+		err = restore_one_sighand_struct(ti, ctx);
+		if (err)
+			return err;
+	}
+
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_SIGNAL_STRUCT, ti->cpt_signal, ctx);
+	if (obj) {
+		struct signal_struct *sig = current->signal;
+		if (obj->o_obj != sig) {
+			return -EINVAL;
+		}
+		if (current->signal) {
+			set_virt_pgid(current, pid_type_to_vpid(PIDTYPE_PGID, current->signal->pgrp));
+			set_virt_sid(current, pid_type_to_vpid(PIDTYPE_SID, current->signal->session));
+		}
+	} else {
+		obj = cpt_object_add(CPT_OBJ_SIGNAL_STRUCT, current->signal, ctx);
+		if (obj == NULL)
+			return -ENOMEM;
+		cpt_obj_setpos(obj, ti->cpt_signal, ctx);
+		err = restore_one_signal_struct(ti, exiting, ctx);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_X86
+static u32 decode_segment(u32 segid)
+{
+	if (segid == CPT_SEG_ZERO)
+		return 0;
+
+	/* TLS descriptors */
+	if (segid <= CPT_SEG_TLS3)
+		return ((GDT_ENTRY_TLS_MIN + segid-CPT_SEG_TLS1)<<3) + 3;
+
+	/* LDT descriptor, it is just an index to LDT array */
+	if (segid >= CPT_SEG_LDT)
+		return ((segid - CPT_SEG_LDT) << 3) | 7;
+
+	/* Check for one of standard descriptors */
+#ifdef CONFIG_X86_64
+	if (segid == CPT_SEG_USER32_DS)
+		return __USER32_DS;
+	if (segid == CPT_SEG_USER32_CS)
+		return __USER32_CS;
+	if (segid == CPT_SEG_USER64_DS)
+		return __USER_DS;
+	if (segid == CPT_SEG_USER64_CS)
+		return __USER_CS;
+#else
+	if (segid == CPT_SEG_USER32_DS)
+		return __USER_DS;
+	if (segid == CPT_SEG_USER32_CS)
+		return __USER_CS;
+#endif
+	wprintk("Invalid segment reg %d\n", segid);
+	return 0;
+}
+#endif
+
+#if defined (CONFIG_IA64)
+void ia64_decrement_ip (struct pt_regs *regs)
+{
+	unsigned long w0, ri = ia64_psr(regs)->ri - 1;
+
+	if (ia64_psr(regs)->ri == 0) {
+		regs->cr_iip -= 16;
+		ri = 2;
+		get_user(w0, (char __user *) regs->cr_iip + 0);
+		if (((w0 >> 1) & 0xf) == 2) {
+			/*
+			 * rfi'ing to slot 2 of an MLX bundle causes
+			 * an illegal operation fault.  We don't want
+			 * that to happen...
+			 */
+			ri = 1;
+		}
+	}
+	ia64_psr(regs)->ri = ri;
+}
+#endif
+
+static void rst_child_tid(unsigned long *child_tids)
+{
+	dprintk("rct: " CPT_FID "\n", CPT_TID(current));
+	current->clear_child_tid = (void*)child_tids[0];
+	current->set_child_tid = (void*)child_tids[1];
+}
+
+static void rst_last_siginfo(void)
+{
+	int signr;
+	siginfo_t *info = current->last_siginfo;
+	struct pt_regs *regs = task_pt_regs(current);
+	struct k_sigaction *ka;
+	int ptrace_id;
+
+	dprintk("rlsi: " CPT_FID "\n", CPT_TID(current));
+
+	spin_lock_irq(&current->sighand->siglock);
+	current->last_siginfo = NULL;
+	recalc_sigpending();
+
+	ptrace_id = current->pn_state;
+	clear_pn_state(current);
+
+	switch (ptrace_id) {
+	case PN_STOP_TF:
+	case PN_STOP_TF_RT:
+		/* frame_*signal */
+		dprintk("SIGTRAP %u/%u(%s) %u/%u %u %ld %lu %lu\n",
+		       virt_pid(current), current->pid, current->comm,
+		       info->si_signo, info->si_code,
+		       current->exit_code, SYSCALL_NR(regs),
+		       current->ptrace, current->ptrace_message);
+		goto out;
+	case PN_STOP_ENTRY:
+	case PN_STOP_LEAVE:
+		/* do_syscall_trace */
+		spin_unlock_irq(&current->sighand->siglock);
+		dprintk("ptrace do_syscall_trace: %d %d\n", ptrace_id, current->exit_code);
+		if (current->exit_code) {
+			send_sig(current->exit_code, current, 1);
+			current->exit_code = 0;
+		}
+		if (IN_SYSCALL(regs)) {
+			if (ptrace_id == PN_STOP_ENTRY
+#ifdef CONFIG_X86
+			    && SYSCALL_ERRNO(regs) == ENOSYS
+#endif
+			    )
+				SYSCALL_RESTART(regs);
+			else if (IN_ERROR(regs) &&
+				 syscall_is(current, regs, rt_sigtimedwait) &&
+				 (SYSCALL_ERRNO(regs) == EAGAIN ||
+				  SYSCALL_ERRNO(regs) == EINTR))
+				SYSCALL_RESTART(regs);
+		}
+		return;
+	case PN_STOP_FORK:
+		/* fork */
+		SYSCALL_SETRET(regs, current->ptrace_message);
+		dprintk("ptrace fork returns pid %ld\n", SYSCALL_RETVAL(regs));
+		goto out;
+	case PN_STOP_VFORK:
+		/* after vfork */
+		SYSCALL_SETRET(regs, current->ptrace_message);
+		dprintk("ptrace after vfork returns pid %ld\n", SYSCALL_RETVAL(regs));
+		goto out;
+	case PN_STOP_SIGNAL:
+		/* normal case : dequeue signal */
+		break;
+	case PN_STOP_EXIT:
+		dprintk("ptrace exit caught\n");
+		current->ptrace &= ~PT_TRACE_EXIT;
+		spin_unlock_irq(&current->sighand->siglock);
+		module_put(THIS_MODULE);
+		complete_and_exit(NULL, current->ptrace_message);
+		BUG();
+	case PN_STOP_EXEC:
+		eprintk("ptrace after exec caught: must not happen\n");
+		BUG();
+	default:
+		eprintk("ptrace with unknown identity %d\n", ptrace_id);
+		BUG();
+	}
+
+	signr = current->exit_code;
+	if (signr == 0) {
+		dprintk("rlsi: canceled signal %d\n", info->si_signo);
+		goto out;
+	}
+	current->exit_code = 0;
+
+	if (signr != info->si_signo) {
+		info->si_signo = signr;
+		info->si_errno = 0;
+		info->si_code = SI_USER;
+		info->si_pid = virt_pid(current->parent);
+		info->si_uid = current->parent->uid;
+	}
+
+	/* If the (new) signal is now blocked, requeue it.  */
+	if (sigismember(&current->blocked, signr)) {
+		dprintk("going to requeue signal %d\n", signr);
+		goto out_resend_sig;
+	}
+
+	ka = &current->sighand->action[signr-1];
+	if (ka->sa.sa_handler == SIG_IGN) {
+		dprintk("going to resend signal %d (ignored)\n", signr);
+		goto out;
+	}
+	if (ka->sa.sa_handler != SIG_DFL) {
+		dprintk("going to resend signal %d (not SIG_DFL)\n", signr);
+		goto out_resend_sig;
+	}
+        if (signr == SIGCONT ||
+	    signr == SIGCHLD ||
+	    signr == SIGWINCH ||
+	    signr == SIGURG ||
+	    current->pid == 1)
+		goto out;
+
+	/* All the rest, which we cannot handle are requeued. */
+	dprintk("going to resend signal %d (sigh)\n", signr);
+out_resend_sig:
+	spin_unlock_irq(&current->sighand->siglock);
+	send_sig_info(signr, info, current);
+	return;
+
+out:
+	spin_unlock_irq(&current->sighand->siglock);
+}
+
+static void rst_finish_stop(void)
+{
+	/* ...
+	 * do_signal() ->
+	 *   get_signal_to_deliver() ->
+	 *     do_signal_stop() ->
+	 *       finish_stop()
+	 *
+	 * Normally after SIGCONT it will dequeue the next signal. If no signal
+	 * is found, do_signal restarts syscall unconditionally.
+	 * Otherwise signal handler is pushed on user stack.
+	 */
+
+	dprintk("rfs: " CPT_FID "\n", CPT_TID(current));
+
+	clear_stop_state(current);
+	current->exit_code = 0;
+}
+
+static void rst_restart_sys(void)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+
+	/* This hook is supposed to be executed, when we have
+	 * to complete some interrupted syscall.
+	 */
+	dprintk("rrs: " CPT_FID "\n", CPT_TID(current));
+
+	if (!IN_SYSCALL(regs) || !IN_ERROR(regs))
+		return;
+
+#ifdef __NR_pause
+	if (syscall_is(current,regs,pause)) {
+		if (SYSCALL_ERRNO(regs) == ERESTARTNOHAND) {
+			current->state = TASK_INTERRUPTIBLE;
+			schedule();
+		}
+	} else
+#else
+	/* On this arch pause() is simulated with sigsuspend(). */
+	if (syscall_is(current,regs,rt_sigsuspend)) {
+		if (SYSCALL_ERRNO(regs) == ERESTARTNOHAND) {
+			current->state = TASK_INTERRUPTIBLE;
+			schedule();
+		}
+	} else
+#endif
+	if (syscall_is(current,regs,rt_sigtimedwait)) {
+		if (SYSCALL_ERRNO(regs) == EAGAIN ||
+		    SYSCALL_ERRNO(regs) == EINTR) {
+			SYSCALL_RESTART(regs);
+		}
+	} else if (syscall_is(current,regs,futex)) {
+		if (SYSCALL_ERRNO(regs) == EINTR &&
+		    !signal_pending(current)) {
+			SYSCALL_RESTART(regs);
+		}
+	}
+
+	if (!signal_pending(current) &&
+	    !test_thread_flag(TIF_RESTORE_SIGMASK)) {
+		if (SYSCALL_ERRNO(regs) == ERESTARTSYS ||
+		    SYSCALL_ERRNO(regs) == ERESTARTNOINTR ||
+		    SYSCALL_ERRNO(regs) == ERESTARTNOHAND) {
+			SYSCALL_RESTART(regs);
+		} else if (SYSCALL_ERRNO(regs) == ERESTART_RESTARTBLOCK) {
+			int new = __NR_restart_syscall;
+#ifdef CONFIG_X86_64
+			if (current->thread_info->flags&_TIF_IA32)
+				new = __NR32_restart_syscall;
+#endif
+			SYSCALL_RESTART2(regs, new);
+		}
+	}
+}
+
+#ifdef CONFIG_X86_32
+
+static int restore_registers(task_t *tsk, struct pt_regs *regs,
+			     struct cpt_task_image *ti, struct cpt_x86_regs *b,
+			     struct resume_info **rip)
+{
+	extern char i386_ret_from_resume;
+
+	if (b->cpt_object != CPT_OBJ_X86_REGS)
+		return -EINVAL;
+
+	tsk->thread.esp = (unsigned long) regs;
+	tsk->thread.esp0 = (unsigned long) (regs+1);
+	tsk->thread.eip = (unsigned long) &i386_ret_from_resume;
+
+	tsk->thread.fs = decode_segment(b->cpt_fs);
+	tsk->thread.gs = decode_segment(b->cpt_gs);
+	tsk->thread.debugreg[0] = b->cpt_debugreg[0];
+	tsk->thread.debugreg[1] = b->cpt_debugreg[1];
+	tsk->thread.debugreg[2] = b->cpt_debugreg[2];
+	tsk->thread.debugreg[3] = b->cpt_debugreg[3];
+	tsk->thread.debugreg[4] = b->cpt_debugreg[4];
+	tsk->thread.debugreg[5] = b->cpt_debugreg[5];
+	tsk->thread.debugreg[6] = b->cpt_debugreg[6];
+	tsk->thread.debugreg[7] = b->cpt_debugreg[7];
+
+	memcpy(regs, &b->cpt_ebx, sizeof(struct pt_regs));
+
+	regs->xcs = decode_segment(b->cpt_xcs);
+	regs->xss = decode_segment(b->cpt_xss);
+	regs->xds = decode_segment(b->cpt_xds);
+	regs->xes = decode_segment(b->cpt_xes);
+
+	tsk->thread.esp -= HOOK_RESERVE;
+	memset((void*)tsk->thread.esp, 0, HOOK_RESERVE);
+	*rip = (void*)tsk->thread.esp;
+
+	return 0;
+}
+
+#elif defined(CONFIG_X86_64)
+
+static void xlate_ptregs_32_to_64(struct pt_regs *d, struct cpt_x86_regs *s)
+{
+	memset(d, 0, sizeof(struct pt_regs));
+	d->rbp = s->cpt_ebp;
+	d->rbx = s->cpt_ebx;
+	d->rax = (s32)s->cpt_eax;
+	d->rcx = s->cpt_ecx;
+	d->rdx = s->cpt_edx;
+	d->rsi = s->cpt_esi;
+	d->rdi = s->cpt_edi;
+	d->orig_rax = (s32)s->cpt_orig_eax;
+	d->rip = s->cpt_eip;
+	d->cs = s->cpt_xcs;
+	d->eflags = s->cpt_eflags;
+	d->rsp = s->cpt_esp;
+	d->ss = s->cpt_xss;
+}
+
+static int restore_registers(task_t *tsk, struct pt_regs *regs,
+			     struct cpt_task_image *ti, struct cpt_obj_bits *hdr,
+			     struct resume_info **rip)
+{
+	if (hdr->cpt_object == CPT_OBJ_X86_64_REGS) {
+		struct cpt_x86_64_regs *b = (void*)hdr;
+
+		tsk->thread.rsp = (unsigned long) regs;
+		tsk->thread.rsp0 = (unsigned long) (regs+1);
+
+		tsk->thread.fs = b->cpt_fsbase;
+		tsk->thread.gs = b->cpt_gsbase;
+		tsk->thread.fsindex = decode_segment(b->cpt_fsindex);
+		tsk->thread.gsindex = decode_segment(b->cpt_gsindex);
+		tsk->thread.ds = decode_segment(b->cpt_ds);
+		tsk->thread.es = decode_segment(b->cpt_es);
+		tsk->thread.debugreg0 = b->cpt_debugreg[0];
+		tsk->thread.debugreg1 = b->cpt_debugreg[1];
+		tsk->thread.debugreg2 = b->cpt_debugreg[2];
+		tsk->thread.debugreg3 = b->cpt_debugreg[3];
+		tsk->thread.debugreg6 = b->cpt_debugreg[6];
+		tsk->thread.debugreg7 = b->cpt_debugreg[7];
+
+		memcpy(regs, &b->cpt_r15, sizeof(struct pt_regs));
+
+		tsk->thread.userrsp = regs->rsp;
+		regs->cs = decode_segment(b->cpt_cs);
+		regs->ss = decode_segment(b->cpt_ss);
+	} else if (hdr->cpt_object == CPT_OBJ_X86_REGS) {
+		struct cpt_x86_regs *b = (void*)hdr;
+
+		tsk->thread.rsp = (unsigned long) regs;
+		tsk->thread.rsp0 = (unsigned long) (regs+1);
+
+		tsk->thread.fs = 0;
+		tsk->thread.gs = 0;
+		tsk->thread.fsindex = decode_segment(b->cpt_fs);
+		tsk->thread.gsindex = decode_segment(b->cpt_gs);
+		tsk->thread.debugreg0 = b->cpt_debugreg[0];
+		tsk->thread.debugreg1 = b->cpt_debugreg[1];
+		tsk->thread.debugreg2 = b->cpt_debugreg[2];
+		tsk->thread.debugreg3 = b->cpt_debugreg[3];
+		tsk->thread.debugreg6 = b->cpt_debugreg[6];
+		tsk->thread.debugreg7 = b->cpt_debugreg[7];
+
+		xlate_ptregs_32_to_64(regs, b);
+
+		tsk->thread.userrsp = regs->rsp;
+		regs->cs = decode_segment(b->cpt_xcs);
+		regs->ss = decode_segment(b->cpt_xss);
+		tsk->thread.ds = decode_segment(b->cpt_xds);
+		tsk->thread.es = decode_segment(b->cpt_xes);
+	} else {
+		return -EINVAL;
+	}
+
+	tsk->thread.rsp -= HOOK_RESERVE;
+	memset((void*)tsk->thread.rsp, 0, HOOK_RESERVE);
+	*rip = (void*)tsk->thread.rsp;
+	return 0;
+}
+
+#elif defined(CONFIG_IA64)
+
+#define MASK(nbits)	((1UL << (nbits)) - 1)	/* mask with NBITS bits set */
+
+#define PUT_BITS(first, last, nat)					\
+	({								\
+		unsigned long bit = ia64_unat_pos(&pt->r##first);	\
+		unsigned long nbits = (last - first + 1);		\
+		unsigned long mask = MASK(nbits) << first;		\
+		long dist;						\
+		if (bit < first)					\
+			dist = 64 + bit - first;			\
+		else							\
+			dist = bit - first;				\
+		ia64_rotl(nat & mask, dist);				\
+	})
+
+unsigned long
+ia64_put_scratch_nat_bits (struct pt_regs *pt, unsigned long nat)
+{
+	unsigned long scratch_unat;
+
+	/*
+	 * Registers that are stored consecutively in struct pt_regs
+	 * can be handled in parallel.  If the register order in
+	 * struct_pt_regs changes, this code MUST be updated.
+	 */
+	scratch_unat  = PUT_BITS( 1,  1, nat);
+	scratch_unat |= PUT_BITS( 2,  3, nat);
+	scratch_unat |= PUT_BITS(12, 13, nat);
+	scratch_unat |= PUT_BITS(14, 14, nat);
+	scratch_unat |= PUT_BITS(15, 15, nat);
+	scratch_unat |= PUT_BITS( 8, 11, nat);
+	scratch_unat |= PUT_BITS(16, 31, nat);
+
+	return scratch_unat;
+
+}
+
+static unsigned long
+ia64_put_saved_nat_bits (struct switch_stack *pt, unsigned long nat)
+{
+	unsigned long scratch_unat;
+
+	scratch_unat  = PUT_BITS( 4,  7, nat);
+
+	return scratch_unat;
+
+}
+
+#undef PUT_BITS
+
+
+static int restore_registers(struct task_struct *tsk, struct pt_regs *pt,
+			     struct cpt_task_image *ti,
+			     struct cpt_ia64_regs *r,
+			     struct resume_info **rip)
+{
+	extern char ia64_ret_from_resume;
+	struct switch_stack *sw;
+	struct resume_info *ri;
+	struct ia64_psr *psr = ia64_psr(pt);
+	void *krbs = (void *)tsk + IA64_RBS_OFFSET;
+	unsigned long reg;
+
+	if (r->cpt_object != CPT_OBJ_IA64_REGS)
+		return -EINVAL;
+
+	if (r->num_regs > 96) {
+		eprintk(CPT_FID " too much RSE regs %lu\n",
+			CPT_TID(tsk), r->num_regs);
+		return -EINVAL;
+	}
+
+	*rip = ri = ((void*)pt) - HOOK_RESERVE;
+	sw = ((struct switch_stack *) ri) - 1;
+
+	memmove(sw, (void*)tsk->thread.ksp + 16, sizeof(struct switch_stack));
+	memset(ri, 0, HOOK_RESERVE);
+
+	/* gr 1,2-3,8-11,12-13,14,15,16-31 are on pt_regs */
+	memcpy(&pt->r1,  &r->gr[1],  8*(2-1));
+	memcpy(&pt->r2,  &r->gr[2],  8*(4-2));
+	memcpy(&pt->r8,  &r->gr[8],  8*(12-8));
+	memcpy(&pt->r12, &r->gr[12], 8*(14-12));
+	memcpy(&pt->r14, &r->gr[14], 8*(15-14));
+	memcpy(&pt->r15, &r->gr[15], 8*(16-15));
+	memcpy(&pt->r16, &r->gr[16], 8*(32-16));
+
+	pt->b0 = r->br[0];
+	pt->b6 = r->br[6];
+	pt->b7 = r->br[7];
+
+	pt->ar_bspstore	= r->ar_bspstore;
+	pt->ar_unat	= r->ar_unat;
+	pt->ar_pfs	= r->ar_pfs;
+	pt->ar_ccv	= r->ar_ccv;
+	pt->ar_fpsr	= r->ar_fpsr;
+	pt->ar_csd	= r->ar_csd;
+	pt->ar_ssd	= r->ar_ssd;
+	pt->ar_rsc	= r->ar_rsc;
+
+	pt->cr_iip	= r->cr_iip;
+	pt->cr_ipsr	= r->cr_ipsr;
+
+	pt->pr = r->pr;
+
+	pt->cr_ifs = r->cfm;
+
+	/* fpregs 6..9,10..11 are in pt_regs */
+	memcpy(&pt->f6,  &r->fr[2*6],  16*(10-6));
+	memcpy(&pt->f10, &r->fr[2*10], 16*(12-10));
+	/* fpreg 12..15 are on switch stack */
+	memcpy(&sw->f12, &r->fr[2*12], 16*(16-12));
+	/* fpregs 32...127 */
+	tsk->thread.flags |= IA64_THREAD_FPH_VALID;
+	memcpy(tsk->thread.fph, &r->fr[32*2], 16*(128-32));
+	ia64_drop_fpu(tsk);
+	psr->dfh = 1;
+
+	memcpy(&sw->r4, &r->gr[4], 8*(8-4));
+	memcpy(&sw->b1, &r->br[1], 8*(6-1));
+	sw->ar_lc = r->ar_lc;
+
+	memcpy(&sw->f2, &r->fr[2*2], 16*(6-2));
+	memcpy(&sw->f16, &r->fr[2*16], 16*(32-16));
+
+	sw->caller_unat = 0;
+	sw->ar_fpsr = pt->ar_fpsr;
+	sw->ar_unat = 0;
+	if (r->nat[0] & 0xFFFFFF0FUL)
+		sw->caller_unat = ia64_put_scratch_nat_bits(pt, r->nat[0]);
+	if (r->nat[0] & 0xF0)
+		sw->ar_unat = ia64_put_saved_nat_bits(sw, r->nat[0]);
+
+	sw->ar_bspstore = (unsigned long)ia64_rse_skip_regs(krbs, r->num_regs);
+	memset(krbs, 0, (void*)sw->ar_bspstore - krbs);
+	sw->ar_rnat = 0;
+	sw->ar_pfs = 0;
+
+	/* This is tricky. When we are in syscall, we have frame
+	 * of output register (sometimes, plus one input reg sometimes).
+	 * It is not so easy to restore such frame, RSE optimizes
+	 * and does not fetch those regs from backstore. So, we restore
+	 * the whole frame as local registers, and then repartition it
+	 * in ia64_ret_from_resume().
+	 */
+	if ((long)pt->cr_ifs >= 0) {
+		unsigned long out = (r->cfm&0x7F) - ((r->cfm>>7)&0x7F);
+		sw->ar_pfs = out | (out<<7);
+	}
+	if (r->ar_ec)
+		sw->ar_pfs |= (r->ar_ec & 0x3F) << 52;
+
+	for (reg = 0; reg < r->num_regs; reg++) {
+		unsigned long *ptr = ia64_rse_skip_regs(krbs, reg);
+		unsigned long *rnatp;
+		unsigned long set_rnat = 0;
+
+		*ptr = r->gr[32+reg];
+
+		if (reg < 32)
+			set_rnat = (r->nat[0] & (1UL<<(reg+32)));
+		else
+			set_rnat = (r->nat[1] & (1UL<<(reg-32)));
+
+		if (set_rnat) {
+			rnatp = ia64_rse_rnat_addr(ptr);
+			if ((unsigned long)rnatp >= sw->ar_bspstore)
+				rnatp = &sw->ar_rnat;
+			*rnatp |= (1UL<<ia64_rse_slot_num(ptr));
+		}
+	}
+	
+	sw->b0 = (unsigned long) &ia64_ret_from_resume;
+	tsk->thread.ksp = (unsigned long) sw - 16;
+
+#define PRED_LEAVE_SYSCALL	1 /* TRUE iff leave from syscall */
+#define PRED_KERNEL_STACK	2 /* returning to kernel-stacks? */
+#define PRED_USER_STACK		3 /* returning to user-stacks? */
+#define PRED_SYSCALL		4 /* inside a system call? */
+#define PRED_NON_SYSCALL	5 /* complement of PRED_SYSCALL */
+
+	pt->loadrs = r->loadrs;
+	sw->pr = 0;
+	sw->pr &= ~(1UL << PRED_LEAVE_SYSCALL);
+	sw->pr &= ~((1UL << PRED_SYSCALL) | (1UL << PRED_NON_SYSCALL));
+	sw->pr &= ~(1UL << PRED_KERNEL_STACK);
+	sw->pr |= (1UL << PRED_USER_STACK);
+	if ((long)pt->cr_ifs < 0) {
+		sw->pr |= (1UL << PRED_NON_SYSCALL);
+	} else {
+		sw->pr |= ((1UL << PRED_SYSCALL) | (1UL << PRED_LEAVE_SYSCALL));
+	}
+
+	return 0;
+}
+#endif
+
+asmlinkage void rst_resume_work(struct resume_info *ri)
+{
+	if (ri->hooks & (1<<HOOK_TID))
+		rst_child_tid(ri->tid_ptrs);
+	if (ri->hooks & (1<<HOOK_CONT))
+		rst_finish_stop();
+	if (ri->hooks & (1<<HOOK_LSI))
+		rst_last_siginfo();
+	if (ri->hooks & (1<<HOOK_RESTART))
+		rst_restart_sys();
+	module_put(THIS_MODULE);
+}
+
+int rst_restore_process(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+		struct cpt_task_image *ti = obj->o_image;
+		struct pt_regs * regs;
+		struct cpt_object_hdr *b;
+		struct cpt_siginfo_image *lsi = NULL;
+		struct group_info *gids, *ogids;
+		struct resume_info *ri = NULL;
+		int i;
+#ifdef CONFIG_USER_RESOURCE
+		int err;
+		struct task_beancounter *tbc;
+		struct user_beancounter *new_bc, *old_bc;
+#endif
+
+		if (tsk == NULL) {
+			eprintk_ctx("oops, task %d/%s is missing\n", ti->cpt_pid, ti->cpt_comm);
+			return -EFAULT;
+		}
+
+		wait_task_inactive(tsk);
+#ifdef CONFIG_USER_RESOURCE
+		tbc = &tsk->task_bc;
+		new_bc = rst_lookup_ubc(ti->cpt_exec_ub, ctx);
+		err = virtinfo_notifier_call(VITYPE_SCP,
+				VIRTINFO_SCP_RSTTSK, new_bc);
+		if (err & NOTIFY_FAIL) {
+			put_beancounter(new_bc);
+			return -ECHRNG; 
+		}
+		old_bc = tbc->exec_ub;
+		if ((err & VIRTNOTIFY_CHANGE) && old_bc != new_bc) {
+			dprintk(" *** replacing ub %p by %p for %p (%d %s)\n",
+					old_bc, new_bc, tsk,
+					tsk->pid, tsk->comm);
+			tbc->exec_ub = new_bc;
+			new_bc = old_bc;
+		}
+		put_beancounter(new_bc);
+#endif
+		regs = task_pt_regs(tsk);
+
+		if (!tsk->exit_state) {
+			tsk->lock_depth = -1;
+#ifdef CONFIG_PREEMPT
+			tsk->thread_info->preempt_count--;
+#endif
+		}
+
+		if (tsk->static_prio != ti->cpt_static_prio)
+			set_user_nice(tsk, PRIO_TO_NICE((s32)ti->cpt_static_prio));
+
+		cpt_sigset_import(&tsk->blocked, ti->cpt_sigblocked);
+		cpt_sigset_import(&tsk->real_blocked, ti->cpt_sigrblocked);
+		cpt_sigset_import(&tsk->saved_sigmask, ti->cpt_sigsuspend_blocked);
+		cpt_sigset_import(&tsk->pending.signal, ti->cpt_sigpending);
+
+		tsk->uid = ti->cpt_uid;
+		tsk->euid = ti->cpt_euid;
+		tsk->suid = ti->cpt_suid;
+		tsk->fsuid = ti->cpt_fsuid;
+		tsk->gid = ti->cpt_gid;
+		tsk->egid = ti->cpt_egid;
+		tsk->sgid = ti->cpt_sgid;
+		tsk->fsgid = ti->cpt_fsgid;
+#ifdef CONFIG_IA64
+		SET_UNALIGN_CTL(tsk, ti->cpt_prctl_uac);
+		SET_FPEMU_CTL(tsk, ti->cpt_prctl_fpemu);
+#endif
+		memcpy(&tsk->cap_effective, &ti->cpt_ecap, sizeof(tsk->cap_effective));
+		memcpy(&tsk->cap_inheritable, &ti->cpt_icap, sizeof(tsk->cap_inheritable));
+		memcpy(&tsk->cap_permitted, &ti->cpt_pcap, sizeof(tsk->cap_permitted));
+		tsk->keep_capabilities = (ti->cpt_keepcap != 0);
+		tsk->did_exec = (ti->cpt_did_exec != 0);
+		gids = groups_alloc(ti->cpt_ngids);
+		ogids = tsk->group_info;
+		if (gids) {
+			int i;
+			for (i=0; i<32; i++)
+				gids->small_block[i] = ti->cpt_gids[i];
+			tsk->group_info = gids;
+		}
+		if (ogids)
+			put_group_info(ogids);
+		tsk->utime = ti->cpt_utime;
+		tsk->stime = ti->cpt_stime;
+		if (ctx->image_version == CPT_VERSION_8)
+			tsk->start_time = _ns_to_timespec(ti->cpt_starttime*TICK_NSEC);
+		else
+			cpt_timespec_import(&tsk->start_time, ti->cpt_starttime);
+		_set_normalized_timespec(&tsk->start_time,
+					tsk->start_time.tv_sec -
+					VE_TASK_INFO(tsk)->owner_env->start_timespec.tv_sec,
+					tsk->start_time.tv_nsec -
+					VE_TASK_INFO(tsk)->owner_env->start_timespec.tv_nsec);
+
+		tsk->nvcsw = ti->cpt_nvcsw;
+		tsk->nivcsw = ti->cpt_nivcsw;
+		tsk->min_flt = ti->cpt_min_flt;
+		tsk->maj_flt = ti->cpt_maj_flt;
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,8)
+		tsk->cutime = ti->cpt_cutime;
+		tsk->cstime = ti->cpt_cstime;
+		tsk->cnvcsw = ti->cpt_cnvcsw;
+		tsk->cnivcsw = ti->cpt_cnivcsw;
+		tsk->cmin_flt = ti->cpt_cmin_flt;
+		tsk->cmaj_flt = ti->cpt_cmaj_flt;
+
+		if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
+			__asm__("undefined\n");
+
+		for (i=0; i<RLIM_NLIMITS; i++) {
+			tsk->rlim[i].rlim_cur = ti->cpt_rlim_cur[i];
+			tsk->rlim[i].rlim_max = ti->cpt_rlim_max[i];
+		}
+#else
+		if (thread_group_leader(tsk) && tsk->signal) {
+			tsk->signal->utime = ti->cpt_utime;
+			tsk->signal->stime = ti->cpt_stime;
+			tsk->signal->cutime = ti->cpt_cutime;
+			tsk->signal->cstime = ti->cpt_cstime;
+			tsk->signal->nvcsw = ti->cpt_nvcsw;
+			tsk->signal->nivcsw = ti->cpt_nivcsw;
+			tsk->signal->cnvcsw = ti->cpt_cnvcsw;
+			tsk->signal->cnivcsw = ti->cpt_cnivcsw;
+			tsk->signal->min_flt = ti->cpt_min_flt;
+			tsk->signal->maj_flt = ti->cpt_maj_flt;
+			tsk->signal->cmin_flt = ti->cpt_cmin_flt;
+			tsk->signal->cmaj_flt = ti->cpt_cmaj_flt;
+
+			if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
+				__asm__("undefined\n");
+
+			for (i=0; i<RLIM_NLIMITS; i++) {
+				tsk->signal->rlim[i].rlim_cur = ti->cpt_rlim_cur[i];
+				tsk->signal->rlim[i].rlim_max = ti->cpt_rlim_max[i];
+			}
+		}
+#endif
+
+#ifdef CONFIG_X86
+		for (i=0; i<3; i++) {
+			if (i >= GDT_ENTRY_TLS_ENTRIES) {
+				eprintk_ctx("too many tls descs\n");
+			} else {
+#ifndef CONFIG_X86_64
+				tsk->thread.tls_array[i].a = ti->cpt_tls[i]&0xFFFFFFFF;
+				tsk->thread.tls_array[i].b = ti->cpt_tls[i]>>32;
+#else
+				tsk->thread.tls_array[i] = ti->cpt_tls[i];
+#endif
+			}
+		}
+#endif
+
+		clear_stopped_child_used_math(tsk);
+
+		b = (void *)(ti+1);
+		while ((void*)b < ((void*)ti) + ti->cpt_next) {
+			/* Siginfo objects are at the end of obj array */
+			if (b->cpt_object == CPT_OBJ_SIGINFO) {
+				struct ve_struct *env = set_exec_env(VE_TASK_INFO(tsk)->owner_env);
+				restore_sigqueue(tsk, &tsk->pending, (unsigned long)b, (unsigned long)ti + ti->cpt_next);
+				set_exec_env(env);
+				break;
+			}
+
+			switch (b->cpt_object) {
+#ifdef CONFIG_X86
+			case CPT_OBJ_BITS:
+				if (b->cpt_content == CPT_CONTENT_X86_FPUSTATE &&
+				    cpu_has_fxsr) {
+					memcpy(&tsk->thread.i387,
+					       (void*)b + b->cpt_hdrlen,
+					       sizeof(struct i387_fxsave_struct));
+					if (ti->cpt_used_math)
+						set_stopped_child_used_math(tsk);
+				}
+#ifndef CONFIG_X86_64
+				else if (b->cpt_content == CPT_CONTENT_X86_FPUSTATE_OLD &&
+					 !cpu_has_fxsr) {
+					memcpy(&tsk->thread.i387,
+					       (void*)b + b->cpt_hdrlen,
+					       sizeof(struct i387_fsave_struct));
+					if (ti->cpt_used_math)
+						set_stopped_child_used_math(tsk);
+				}
+#endif
+				break;
+#endif
+			case CPT_OBJ_LASTSIGINFO:
+				lsi = (void*)b;
+				break;
+			case CPT_OBJ_X86_REGS:
+			case CPT_OBJ_X86_64_REGS:
+			case CPT_OBJ_IA64_REGS:
+				if (restore_registers(tsk, regs, ti, (void*)b, &ri)) {
+					eprintk_ctx("cannot restore registers: image is corrupted\n");
+					return -EINVAL;
+				}
+				break;
+			case CPT_OBJ_SIGALTSTACK: {
+				struct cpt_sigaltstack_image *sas;
+				sas = (struct cpt_sigaltstack_image *)b;
+				tsk->sas_ss_sp = sas->cpt_stack;
+				tsk->sas_ss_size = sas->cpt_stacksize;
+				break;
+			    }
+			}
+			b = ((void*)b) + b->cpt_next;
+		}
+
+		if (ri == NULL && !(ti->cpt_state & (EXIT_ZOMBIE|EXIT_DEAD))) {
+			eprintk_ctx("missing register info\n");
+			return -EINVAL;
+		}
+
+		if (ti->cpt_ppid != ti->cpt_rppid) {
+			task_t *parent;
+			struct ve_struct *env = set_exec_env(VE_TASK_INFO(tsk)->owner_env);
+			write_lock_irq(&tasklist_lock);
+			parent = find_task_by_pid_ve(ti->cpt_ppid);
+			if (parent && parent != tsk->parent) {
+				list_add(&tsk->ptrace_list, &tsk->parent->ptrace_children);
+				REMOVE_LINKS(tsk);
+				tsk->parent = parent;
+				SET_LINKS(tsk);
+			}
+			write_unlock_irq(&tasklist_lock);
+			set_exec_env(env);
+		}
+
+		tsk->ptrace_message = ti->cpt_ptrace_message;
+		tsk->pn_state = ti->cpt_pn_state;
+		tsk->stopped_state = ti->cpt_stopped_state;
+		tsk->thread_info->flags = ti->cpt_thrflags;
+
+		/* The image was created with kernel < 2.6.16, while
+		 * task hanged in sigsuspend -> do_signal.
+		 *
+		 * FIXME! This needs more brain efforts...
+		 */
+		if (ti->cpt_sigsuspend_state) {
+			tsk->thread_info->flags |= _TIF_RESTORE_SIGMASK;
+		}
+
+#ifdef CONFIG_X86_64
+		tsk->thread_info->flags |= _TIF_FORK | _TIF_RESUME;
+		if (!ti->cpt_64bit)
+			tsk->thread_info->flags |= _TIF_IA32;
+#endif
+
+#ifdef CONFIG_X86_32
+		do {
+			if (regs->orig_eax == __NR__newselect && regs->edi) {
+				struct timeval tv;
+				if (access_process_vm(tsk, regs->edi, &tv,
+						sizeof(tv), 0) != sizeof(tv)) {
+					wprintk_ctx("task %d/%d(%s): Error 1 in access_process_vm: edi %ld\n",
+						virt_pid(tsk), tsk->pid, tsk->comm,
+					       regs->edi);
+					break;
+				}
+				dprintk_ctx("task %d/%d(%s): Old timeval in newselect: %ld.%ld\n",
+				       virt_pid(tsk), tsk->pid, tsk->comm,
+				       tv.tv_sec, tv.tv_usec);
+				tv.tv_sec -= ctx->delta_time.tv_sec;
+				if (tv.tv_usec < ctx->delta_time.tv_nsec / 1000) {
+					tv.tv_usec += 1000000 - ctx->delta_time.tv_nsec / 1000;
+					tv.tv_sec--;
+				} else {
+					tv.tv_usec -= ctx->delta_time.tv_nsec / 1000;
+				}
+				if (tv.tv_sec < 0) {
+					tv.tv_sec = 0;
+					tv.tv_usec = 0;
+				}
+				dprintk_ctx("task %d/%d(%s): New timeval in newselect: %ld.%ld\n",
+					virt_pid(tsk), tsk->pid, tsk->comm,
+				       tv.tv_sec, tv.tv_usec);
+				if (access_process_vm(tsk, regs->edi, &tv,
+						sizeof(tv), 1) != sizeof(tv)) {
+					wprintk_ctx("task %d/%d(%s): Error 1 in access_process_vm write: edi %ld\n",
+						virt_pid(tsk), tsk->pid, tsk->comm, regs->edi);
+				}
+
+			} else if (regs->orig_eax == __NR_select && regs->edi) {
+				struct {
+					unsigned long n;
+					fd_set __user *inp, *outp, *exp;
+					struct timeval __user *tvp;
+				} a;
+				struct timeval tv;
+				if (access_process_vm(tsk, regs->ebx, &a,
+						sizeof(a), 0) != sizeof(a)) {
+					wprintk_ctx("task %d: Error 2 in access_process_vm\n", tsk->pid);
+					break;
+				}
+				if (access_process_vm(tsk, (unsigned long)a.tvp,
+						&tv, sizeof(tv), 0) != sizeof(tv)) {
+					wprintk_ctx("task %d: Error 3 in access_process_vm\n", tsk->pid);
+					break;
+				}
+				dprintk_ctx("task %d: Old timeval in select: %ld.%ld\n",
+					tsk->pid, tv.tv_sec, tv.tv_usec);
+				tv.tv_sec -= ctx->delta_time.tv_sec;
+				if (tv.tv_usec < ctx->delta_time.tv_nsec / 1000) {
+					tv.tv_usec += 1000000 - ctx->delta_time.tv_nsec / 1000;
+					tv.tv_sec--;
+				} else {
+					tv.tv_usec -= ctx->delta_time.tv_nsec / 1000;
+				}
+				if (tv.tv_sec < 0) {
+					tv.tv_sec = 0;
+					tv.tv_usec = 0;
+				}
+				dprintk_ctx("task %d: New timeval in select: %ld.%ld\n",
+					tsk->pid, tv.tv_sec, tv.tv_usec);
+				if (access_process_vm(tsk, (unsigned long)a.tvp,
+						&tv, sizeof(tv), 1) != sizeof(tv)) {
+					wprintk_ctx("task %d: Error 3 in access_process_vm write\n", tsk->pid);
+				}
+			}
+		} while (0);
+#endif
+
+		if (ri && IN_SYSCALL(regs) && IN_ERROR(regs)) {
+			switch (SYSCALL_ERRNO(regs)) {
+			case ERESTARTSYS:
+			case ERESTARTNOINTR:
+			case ERESTARTNOHAND:
+			case ERESTART_RESTARTBLOCK:
+			case EAGAIN:
+			case EINTR:
+				ri->hooks |= (1<<HOOK_RESTART);
+			}
+		}
+
+		if (ri && (lsi || tsk->pn_state)) {
+			/* ... -> ptrace_notify()
+			 * or
+			 * ... -> do_signal() -> get_signal_to_deliver() ->
+			 *   ptrace stop
+			 */
+			tsk->last_siginfo = &ri->last_siginfo;
+			ri->hooks |= (1<<HOOK_LSI);
+			if (lsi)
+				decode_siginfo(tsk->last_siginfo, lsi);
+		}
+
+		tsk->ptrace = ti->cpt_ptrace;
+		tsk->flags = ti->cpt_flags & ~PF_FROZEN;
+		clear_tsk_thread_flag(tsk, TIF_FREEZE);
+		tsk->exit_signal = ti->cpt_exit_signal;
+
+		if (ri && tsk->stopped_state) {
+			dprintk_ctx("finish_stop\n");
+			if (ti->cpt_state != TASK_STOPPED)
+				eprintk_ctx("Hellooo, state is %u\n", (unsigned)ti->cpt_state);
+			ri->hooks |= (1<<HOOK_CONT);
+		}
+
+		if (ri && (ti->cpt_set_tid || ti->cpt_clear_tid)) {
+			ri->hooks |= (1<<HOOK_TID);
+			ri->tid_ptrs[0] = ti->cpt_clear_tid;
+			ri->tid_ptrs[1] = ti->cpt_set_tid;
+			dprintk_ctx("settids\n");
+		}
+
+		if (ri && ri->hooks &&
+		    !(ti->cpt_state & (EXIT_ZOMBIE|EXIT_DEAD))) {
+			if (try_module_get(THIS_MODULE))
+				ri->hook = rst_resume_work;
+		}
+
+		if (ti->cpt_state == TASK_TRACED)
+			tsk->state = TASK_TRACED;
+		else if (ti->cpt_state & (EXIT_ZOMBIE|EXIT_DEAD)) {
+			tsk->signal->it_virt_expires = 0;
+			tsk->signal->it_prof_expires = 0;
+			if (tsk->state != EXIT_DEAD)
+				eprintk_ctx("oops, schedule() did not make us dead\n");
+		}
+
+		if (thread_group_leader(tsk) &&
+		    ti->cpt_it_real_value &&
+		    !(ti->cpt_state & (EXIT_ZOMBIE|EXIT_DEAD))) {
+			DEFINE_KTIME(val);
+			s64 nsec = ti->cpt_it_real_value;
+
+			if (ctx->image_version < CPT_VERSION_9)
+				nsec *= TICK_NSEC;
+
+			val = ktime_add_ns(val, nsec - ctx->delta_nsec);
+			if (val.tv64 <= 0)
+				val.tv64 = NSEC_PER_USEC;
+			dprintk("rst itimer " CPT_FID " +%Ld %Lu\n", CPT_TID(tsk),
+				(long long)val.tv64,
+				(unsigned long long)ti->cpt_it_real_value);
+
+			spin_lock_irq(&tsk->sighand->siglock);
+			if (hrtimer_try_to_cancel(&tsk->signal->real_timer) >= 0) {
+				/* FIXME. Check!!!! */
+				hrtimer_start(&tsk->signal->real_timer, val, HRTIMER_REL);
+			} else {
+				wprintk_ctx("Timer clash. Impossible?\n");
+			}
+			spin_unlock_irq(&tsk->sighand->siglock);
+
+			dprintk_ctx("itimer " CPT_FID " +%Lu\n", CPT_TID(tsk),
+				    (unsigned long long)val.tv64);
+		}
+
+		module_put(THIS_MODULE);
+	}
+	return 0;
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/rst_socket.c linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_socket.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/rst_socket.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_socket.c	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,909 @@
+/*
+ *
+ *  kernel/cpt/rst_socket.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/socket.h>
+#include <linux/un.h>
+#include <net/tcp.h>
+#include <net/sock.h>
+#include <net/scm.h>
+#include <net/af_unix.h>
+
+#include <ub/ub_mem.h>
+#include <ub/ub_orphan.h>
+#include <ub/ub_orphan.h>
+#include <ub/ub_net.h>
+#include <ub/ub_tcp.h>
+
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_files.h"
+#include "cpt_socket.h"
+#include "cpt_kernel.h"
+
+#include "cpt_syscalls.h"
+
+
+static int setup_sock_common(struct sock *sk, struct cpt_sock_image *si,
+			     loff_t pos, struct cpt_context *ctx)
+{
+	if (sk->sk_socket) {
+		sk->sk_socket->flags = si->cpt_ssflags;
+		sk->sk_socket->state = si->cpt_sstate;
+	}
+	sk->sk_reuse = si->cpt_reuse;
+	sk->sk_shutdown = si->cpt_shutdown;
+	sk->sk_userlocks = si->cpt_userlocks;
+	sk->sk_no_check = si->cpt_no_check;
+	sock_reset_flag(sk, SOCK_DBG);
+	if (si->cpt_debug)
+		sock_set_flag(sk, SOCK_DBG);
+	sock_reset_flag(sk, SOCK_RCVTSTAMP);
+	if (si->cpt_rcvtstamp)
+		sock_set_flag(sk, SOCK_RCVTSTAMP);
+	sock_reset_flag(sk, SOCK_LOCALROUTE);
+	if (si->cpt_localroute)
+		sock_set_flag(sk, SOCK_LOCALROUTE);
+	sk->sk_protocol = si->cpt_protocol;
+	sk->sk_err = si->cpt_err;
+	sk->sk_err_soft = si->cpt_err_soft;
+	sk->sk_priority = si->cpt_priority;
+	sk->sk_rcvlowat = si->cpt_rcvlowat;
+	sk->sk_rcvtimeo = si->cpt_rcvtimeo;
+	if (si->cpt_rcvtimeo == CPT_NULL)
+		sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
+	sk->sk_sndtimeo = si->cpt_sndtimeo;
+	if (si->cpt_sndtimeo == CPT_NULL)
+		sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
+	sk->sk_rcvbuf = si->cpt_rcvbuf;
+	sk->sk_sndbuf = si->cpt_sndbuf;
+	sk->sk_bound_dev_if = si->cpt_bound_dev_if;
+	sk->sk_flags = si->cpt_flags;
+	sk->sk_lingertime = si->cpt_lingertime;
+	if (si->cpt_lingertime == CPT_NULL)
+		sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
+	sk->sk_peercred.pid = si->cpt_peer_pid;
+	sk->sk_peercred.uid = si->cpt_peer_uid;
+	sk->sk_peercred.gid = si->cpt_peer_gid;
+	cpt_timeval_import(&sk->sk_stamp, si->cpt_stamp);
+	return 0;
+}
+
+static struct file *sock_mapfile(struct socket *sock)
+{
+	int fd = sock_map_fd(sock);
+
+	if (fd >= 0) {
+		struct file *file = sock->file;
+		get_file(file);
+		sc_close(fd);
+		return file;
+	}
+	return ERR_PTR(fd);
+}
+
+/* Assumption is that /tmp exists and writable.
+ * In previous versions we assumed that listen() will autobind
+ * the socket. It does not do this for AF_UNIX by evident reason:
+ * socket in abstract namespace is accessible, unlike socket bound
+ * to deleted FS object.
+ */
+
+static int
+select_deleted_name(char * name, cpt_context_t *ctx)
+{
+	int i;
+
+	for (i=0; i<100; i++) {
+		struct nameidata nd;
+		unsigned int rnd = net_random();
+
+		sprintf(name, "/tmp/SOCK.%08x", rnd);
+
+		if (path_lookup(name, 0, &nd) != 0)
+			return 0;
+
+		path_release(&nd);
+	}
+
+	eprintk_ctx("failed to allocate deleted socket inode\n");
+	return -ELOOP;
+}
+
+static int
+bind_unix_socket(struct socket *sock, struct cpt_sock_image *si,
+		 cpt_context_t *ctx)
+{
+	int err;
+	char *name;
+	struct sockaddr* addr;
+	int addrlen;
+	struct sockaddr_un sun;
+	struct nameidata nd;
+
+	if ((addrlen = si->cpt_laddrlen) <= 2)
+		return 0;
+
+	nd.dentry = NULL;
+	name = ((char*)si->cpt_laddr) + 2;
+	addr = (struct sockaddr *)si->cpt_laddr;
+
+	if (name[0]) {
+		if (path_lookup(name, 0, &nd))
+			nd.dentry = NULL;
+
+		if (si->cpt_deleted) {
+			if (nd.dentry == NULL &&
+			    sock->ops->bind(sock, addr, addrlen) == 0) {
+				sc_unlink(name);
+				return 0;
+			}
+
+			addr = (struct sockaddr*)&sun;
+			addr->sa_family = AF_UNIX;
+			name = ((char*)addr) + 2;
+			err = select_deleted_name(name, ctx);
+			if (err)
+				goto out;
+			addrlen = 2 + strlen(name);
+		} else if (nd.dentry) {
+			if (!S_ISSOCK(nd.dentry->d_inode->i_mode)) {
+				eprintk_ctx("bind_unix_socket: not a socket dentry\n");
+				err = -EINVAL;
+				goto out;
+			}
+			sc_unlink(name);
+		}
+	}
+
+	err = sock->ops->bind(sock, addr, addrlen);
+
+	if (!err && name[0]) {
+		if (nd.dentry) {
+			sc_chown(name, nd.dentry->d_inode->i_uid,
+				 nd.dentry->d_inode->i_gid);
+			sc_chmod(name, nd.dentry->d_inode->i_mode);
+		}
+		if (si->cpt_deleted)
+			sc_unlink(name);
+	}
+
+out:
+	if (nd.dentry)
+		path_release(&nd);
+	return err;
+}
+
+static int fixup_unix_address(struct socket *sock, struct cpt_sock_image *si,
+			      struct cpt_context *ctx)
+{
+	struct sock *sk = sock->sk;
+	cpt_object_t *obj;
+	struct sock *parent;
+
+	if (sk->sk_family != AF_UNIX || sk->sk_state == TCP_LISTEN)
+		return 0;
+
+	if (si->cpt_parent == -1)
+		return bind_unix_socket(sock, si, ctx);
+
+	obj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, si->cpt_parent, ctx);
+	if (!obj)
+		return 0;
+
+	parent = obj->o_obj;
+	if (unix_sk(parent)->addr) {
+		if (unix_sk(sk)->addr &&
+		    atomic_dec_and_test(&unix_sk(sk)->addr->refcnt))
+			kfree(unix_sk(sk)->addr);
+		atomic_inc(&unix_sk(parent)->addr->refcnt);
+		unix_sk(sk)->addr = unix_sk(parent)->addr;
+	}
+	return 0;
+}
+
+static int generic_restore_queues(struct sock *sk, struct cpt_sock_image *si,
+				  loff_t pos, struct cpt_context *ctx)
+{
+	loff_t endpos;
+
+	pos = pos + si->cpt_hdrlen;
+	endpos = pos + si->cpt_next;
+	while (pos < endpos) {
+		struct sk_buff *skb;
+		__u32 type;
+
+		skb = rst_skb(&pos, NULL, &type, ctx);
+		if (IS_ERR(skb)) {
+			if (PTR_ERR(skb) == -EINVAL) {
+				int err;
+
+				err = rst_sock_attr(&pos, sk, ctx);
+				if (err)
+					return err;
+			}
+			return PTR_ERR(skb);
+		}
+
+		if (type == CPT_SKB_RQ) {
+			skb_set_owner_r(skb, sk);
+			skb_queue_tail(&sk->sk_receive_queue, skb);
+		} else {
+			wprintk_ctx("strange socket queue type %u\n", type);
+			kfree_skb(skb);
+		}
+	}
+	return 0;
+}
+
+static int open_socket(cpt_object_t *obj, struct cpt_sock_image *si,
+		       struct cpt_context *ctx)
+{
+	int err;
+	struct socket *sock;
+	struct socket *sock2 = NULL;
+	struct file *file;
+	cpt_object_t *fobj;
+	cpt_object_t *pobj = NULL;
+
+	err = sock_create_kern(si->cpt_family, si->cpt_type, si->cpt_protocol,
+			       &sock);
+	if (err)
+		return err;
+
+	if (si->cpt_socketpair) {
+		err = sock_create_kern(si->cpt_family, si->cpt_type,
+				       si->cpt_protocol, &sock2);
+		if (err)
+			goto err_out;
+
+		err = sock->ops->socketpair(sock, sock2);
+		if (err < 0)
+			goto err_out;
+
+		/* Socketpair with a peer outside our environment.
+		 * So, we create real half-open pipe and do not worry
+		 * about dead end anymore. */
+		if (si->cpt_peer == -1) {
+			sock_release(sock2);
+			sock2 = NULL;
+		}
+	}
+
+	cpt_obj_setobj(obj, sock->sk, ctx);
+
+	if (si->cpt_file != CPT_NULL) {
+		file = sock_mapfile(sock);
+		err = PTR_ERR(file);
+		if (IS_ERR(file))
+			goto err_out;
+
+		err = -ENOMEM;
+
+		obj->o_parent = file;
+
+		if ((fobj = cpt_object_add(CPT_OBJ_FILE, file, ctx)) == NULL)
+			goto err_out;
+		cpt_obj_setpos(fobj, si->cpt_file, ctx);
+		cpt_obj_setindex(fobj, si->cpt_index, ctx);
+	}
+
+	if (sock2) {
+		struct file *file2;
+
+		pobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, si->cpt_peer, ctx);
+		if (!pobj) BUG();
+		if (pobj->o_obj) BUG();
+		cpt_obj_setobj(pobj, sock2->sk, ctx);
+
+		if (pobj->o_ppos != CPT_NULL) {
+			file2 = sock_mapfile(sock2);
+			err = PTR_ERR(file2);
+			if (IS_ERR(file2))
+				goto err_out;
+
+			err = -ENOMEM;
+			if ((fobj = cpt_object_add(CPT_OBJ_FILE, file2, ctx)) == NULL)
+				goto err_out;
+			cpt_obj_setpos(fobj, pobj->o_ppos, ctx);
+			cpt_obj_setindex(fobj, si->cpt_peer, ctx);
+
+			pobj->o_parent = file2;
+		}
+	}
+
+	setup_sock_common(sock->sk, si, obj->o_pos, ctx);
+	if (sock->sk->sk_family == AF_INET || sock->sk->sk_family == AF_INET6) {
+		int saved_reuse = sock->sk->sk_reuse;
+
+		inet_sk(sock->sk)->freebind = 1;
+		sock->sk->sk_reuse = 2;
+		if (si->cpt_laddrlen) {
+			err = sock->ops->bind(sock, (struct sockaddr *)&si->cpt_laddr, si->cpt_laddrlen);
+			if (err) {
+				dprintk_ctx("binding failed: %d, do not worry\n", err);
+			}
+		}
+		sock->sk->sk_reuse = saved_reuse;
+		rst_socket_in(si, obj->o_pos, sock->sk, ctx);
+	} else if (sock->sk->sk_family == AF_NETLINK) {
+		struct sockaddr_nl *nl = (struct sockaddr_nl *)&si->cpt_laddr;
+		if (nl->nl_pid) {
+			err = sock->ops->bind(sock, (struct sockaddr *)&si->cpt_laddr, si->cpt_laddrlen);
+			if (err) {
+				eprintk_ctx("AF_NETLINK binding failed: %d\n", err);
+			}
+		}
+		if (si->cpt_raddrlen && nl->nl_pid) {
+			err = sock->ops->connect(sock, (struct sockaddr *)&si->cpt_raddr, si->cpt_raddrlen, O_NONBLOCK);
+			if (err) {
+				eprintk_ctx("oops, AF_NETLINK connect failed: %d\n", err);
+			}
+		}
+		generic_restore_queues(sock->sk, si, obj->o_pos, ctx);
+	} else if (sock->sk->sk_family == PF_PACKET) {
+		struct sockaddr_ll *ll = (struct sockaddr_ll *)&si->cpt_laddr;
+		if (ll->sll_protocol || ll->sll_ifindex) {
+			int alen = si->cpt_laddrlen;
+			if (alen < sizeof(struct sockaddr_ll))
+				alen = sizeof(struct sockaddr_ll);
+			err = sock->ops->bind(sock, (struct sockaddr *)&si->cpt_laddr, alen);
+			if (err) {
+				eprintk_ctx("AF_PACKET binding failed: %d\n", err);
+			}
+		}
+		generic_restore_queues(sock->sk, si, obj->o_pos, ctx);
+	}
+	fixup_unix_address(sock, si, ctx);
+
+	if (sock2) {
+		err = rst_get_object(CPT_OBJ_SOCKET, pobj->o_pos, si, ctx);
+		if (err)
+			return err;
+		setup_sock_common(sock2->sk, si, pobj->o_pos, ctx);
+		fixup_unix_address(sock2, si, ctx);
+	}
+
+	if ((sock->sk->sk_family == AF_INET || sock->sk->sk_family == AF_INET6)
+	    && (int)si->cpt_parent != -1) {
+		cpt_object_t *lobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, si->cpt_parent, ctx);
+		if (lobj && cpt_attach_accept(lobj->o_obj, sock->sk, ctx) == 0)
+			sock->sk = NULL;
+	}
+
+
+	if (si->cpt_file == CPT_NULL && sock->sk &&
+	    sock->sk->sk_family == AF_INET) {
+		struct sock *sk = sock->sk;
+
+		if (sk) {
+			sock->sk = NULL;
+
+			local_bh_disable();
+			bh_lock_sock(sk);
+			if (sock_owned_by_user(sk))
+				eprintk_ctx("oops, sock is locked by user\n");
+
+			sock_hold(sk);
+			sock_orphan(sk);
+			ub_inc_orphan_count(sk);
+			bh_unlock_sock(sk);
+			local_bh_enable();
+			sock_put(sk);
+			dprintk_ctx("orphaning socket %p\n", sk);
+		}
+	}
+
+	if (si->cpt_file == CPT_NULL && sock->sk == NULL)
+		sock_release(sock);
+
+	return 0;
+
+err_out:
+	if (sock2)
+		sock_release(sock2);
+	sock_release(sock);
+	return err;
+}
+
+static int open_listening_socket(loff_t pos, struct cpt_sock_image *si,
+				 struct cpt_context *ctx)
+{
+	int err;
+	struct socket *sock;
+	struct file *file;
+	cpt_object_t *obj, *fobj;
+
+	err = sock_create_kern(si->cpt_family, si->cpt_type, si->cpt_protocol,
+			       &sock);
+	if (err) {
+		eprintk_ctx("open_listening_socket: sock_create_kern: %d\n", err);
+		return err;
+	}
+
+	sock->sk->sk_reuse = 2;
+	sock->sk->sk_bound_dev_if = si->cpt_bound_dev_if;
+
+	if (sock->sk->sk_family == AF_UNIX) {
+		err = bind_unix_socket(sock, si, ctx);
+	} else if (si->cpt_laddrlen) {
+		if (sock->sk->sk_family == AF_INET || sock->sk->sk_family == AF_INET6)
+			inet_sk(sock->sk)->freebind = 1;
+
+		err = sock->ops->bind(sock, (struct sockaddr *)&si->cpt_laddr, si->cpt_laddrlen);
+
+		if (err) {
+			eprintk_ctx("open_listening_socket: bind: %d\n", err);
+			goto err_out;
+		}
+	}
+
+	err = sock->ops->listen(sock, si->cpt_max_ack_backlog);
+	if (err) {
+		eprintk_ctx("open_listening_socket: listen: %d, %Ld, %d\n", err, pos, si->cpt_deleted);
+		goto err_out;
+	}
+
+	/* Now we may access socket body directly and fixup all the things. */
+
+	file = sock_mapfile(sock);
+	err = PTR_ERR(file);
+	if (IS_ERR(file)) {
+		eprintk_ctx("open_listening_socket: map: %d\n", err);
+		goto err_out;
+	}
+
+	err = -ENOMEM;
+	if ((fobj = cpt_object_add(CPT_OBJ_FILE, file, ctx)) == NULL)
+		goto err_out;
+	if ((obj = cpt_object_add(CPT_OBJ_SOCKET, sock->sk, ctx)) == NULL)
+		goto err_out;
+	cpt_obj_setpos(obj, pos, ctx);
+	cpt_obj_setindex(obj, si->cpt_index, ctx);
+	obj->o_parent = file;
+	cpt_obj_setpos(fobj, si->cpt_file, ctx);
+	cpt_obj_setindex(fobj, si->cpt_index, ctx);
+
+	setup_sock_common(sock->sk, si, pos, ctx);
+
+	if (si->cpt_family == AF_INET || si->cpt_family == AF_INET6)
+		rst_restore_synwait_queue(sock->sk, si, pos, ctx);
+
+	return 0;
+
+err_out:
+	sock_release(sock);
+	return err;
+}
+
+static int
+rst_sock_attr_mcfilter(loff_t *pos_p, struct sock *sk, cpt_context_t *ctx)
+{
+	int err;
+	loff_t pos = *pos_p;
+	struct cpt_sockmc_image v;
+
+	err = rst_get_object(CPT_OBJ_SOCK_MCADDR, pos, &v, ctx);
+	if (err)
+		return err;
+
+	*pos_p += v.cpt_next;
+
+	if (v.cpt_family == AF_INET)
+		return rst_sk_mcfilter_in(sk, &v, pos, ctx);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	else if (v.cpt_family == AF_INET6)
+		return rst_sk_mcfilter_in6(sk, &v, pos, ctx);
+#endif
+	else
+		return -EAFNOSUPPORT;
+}
+
+static int
+rst_sock_attr_skfilter(loff_t *pos_p, struct sock *sk, cpt_context_t *ctx)
+{
+	int err;
+	struct sk_filter *fp, *old_fp;
+	loff_t pos = *pos_p;
+	struct cpt_obj_bits v;
+
+	err = rst_get_object(CPT_OBJ_SKFILTER, pos, &v, ctx);
+	if (err)
+		return err;
+
+	*pos_p += v.cpt_next;
+
+	if (v.cpt_size % sizeof(struct sock_filter))
+		return -EINVAL;
+
+	fp = sock_kmalloc(sk, v.cpt_size+sizeof(*fp), GFP_KERNEL_UBC);
+	if (fp == NULL)
+		return -ENOMEM;
+	atomic_set(&fp->refcnt, 1);
+	fp->len = v.cpt_size/sizeof(struct sock_filter);
+
+	err = ctx->pread(fp->insns, v.cpt_size, ctx, pos+v.cpt_hdrlen);
+	if (err) {
+		sk_filter_release(sk, fp);
+		return err;
+	}
+
+	old_fp = sk->sk_filter;
+	sk->sk_filter = fp;
+	if (old_fp)
+		sk_filter_release(sk, old_fp);
+	return 0;
+}
+
+
+int rst_sock_attr(loff_t *pos_p, struct sock *sk, cpt_context_t *ctx)
+{
+	int err;
+	loff_t pos = *pos_p;
+
+	err = rst_sock_attr_skfilter(pos_p, sk, ctx);
+	if (err && pos == *pos_p)
+		err = rst_sock_attr_mcfilter(pos_p, sk, ctx);
+	return err;
+}
+
+struct sk_buff * rst_skb(loff_t *pos_p, __u32 *owner, __u32 *queue, struct cpt_context *ctx)
+{
+	int err;
+	struct sk_buff *skb;
+	struct cpt_skb_image v;
+	loff_t pos = *pos_p;
+	struct scm_fp_list *fpl = NULL;
+	struct timeval tmptv;
+
+	err = rst_get_object(CPT_OBJ_SKB, pos, &v, ctx);
+	if (err)
+		return ERR_PTR(err);
+	*pos_p = pos + v.cpt_next;
+
+	if (owner)
+		*owner = v.cpt_owner;
+	if (queue)
+		*queue = v.cpt_queue;
+
+	skb = alloc_skb(v.cpt_len + v.cpt_hspace + v.cpt_tspace, GFP_KERNEL);
+	if (skb == NULL)
+		return ERR_PTR(-ENOMEM);
+	skb_reserve(skb, v.cpt_hspace);
+	skb_put(skb, v.cpt_len);
+	skb->h.raw = skb->head + v.cpt_h;
+	skb->nh.raw = skb->head + v.cpt_nh;
+	skb->mac.raw = skb->head + v.cpt_mac;
+	BUILD_BUG_ON(sizeof(skb->cb) < sizeof(v.cpt_cb));
+	memcpy(skb->cb, v.cpt_cb, sizeof(v.cpt_cb));
+	skb->mac_len = v.cpt_mac_len;
+
+	skb->csum = v.cpt_csum;
+	skb->local_df = v.cpt_local_df;
+	skb->pkt_type = v.cpt_pkt_type;
+	skb->ip_summed = v.cpt_ip_summed;
+	skb->priority = v.cpt_priority;
+	skb->protocol = v.cpt_protocol;
+	cpt_timeval_import(&tmptv, v.cpt_stamp);
+	skb_set_timestamp(skb, &tmptv);
+
+	skb_shinfo(skb)->gso_segs = v.cpt_gso_segs;
+	skb_shinfo(skb)->gso_size = v.cpt_gso_size;
+	if (ctx->image_version == 0) {
+		skb_shinfo(skb)->gso_segs = 1;
+		skb_shinfo(skb)->gso_size = 0;
+	}
+
+	if (v.cpt_next > v.cpt_hdrlen) {
+		pos = pos + v.cpt_hdrlen;
+		while (pos < *pos_p) {
+			union {
+				struct cpt_obj_bits b;
+				struct cpt_fd_image f;
+			} u;
+
+			err = rst_get_object(-1, pos, &u, ctx);
+			if (err) {
+				kfree_skb(skb);
+				return ERR_PTR(err);
+			}
+			if (u.b.cpt_object == CPT_OBJ_BITS) {
+				if (u.b.cpt_size != v.cpt_hspace + skb->len) {
+					eprintk_ctx("invalid skb image %u != %u + %u\n", u.b.cpt_size, v.cpt_hspace, skb->len);
+					kfree_skb(skb);
+					return ERR_PTR(-EINVAL);
+				}
+
+				err = ctx->pread(skb->head, u.b.cpt_size, ctx, pos+u.b.cpt_hdrlen);
+				if (err) {
+					kfree_skb(skb);
+					return ERR_PTR(err);
+				}
+			} else if (u.f.cpt_object == CPT_OBJ_FILEDESC) {
+				if (!fpl) {
+					fpl = ub_kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
+					if (!fpl) {
+						kfree_skb(skb);
+						return ERR_PTR(-ENOMEM);
+					}
+					fpl->count = 0;
+					UNIXCB(skb).fp = fpl;
+				}
+				fpl->fp[fpl->count] = rst_file(u.f.cpt_file, -1, ctx);
+				if (!IS_ERR(fpl->fp[fpl->count]))
+					fpl->count++;
+			}
+			pos += u.b.cpt_next;
+		}
+	}
+
+	return skb;
+}
+
+static int restore_unix_rqueue(struct sock *sk, struct cpt_sock_image *si,
+			       loff_t pos, struct cpt_context *ctx)
+{
+	loff_t endpos;
+
+	pos = pos + si->cpt_hdrlen;
+	endpos = pos + si->cpt_next;
+	while (pos < endpos) {
+		struct sk_buff *skb;
+		struct sock *owner_sk;
+		__u32 owner;
+
+		skb = rst_skb(&pos, &owner, NULL, ctx);
+		if (IS_ERR(skb)) {
+			if (PTR_ERR(skb) == -EINVAL) {
+				int err;
+
+				err = rst_sock_attr(&pos, sk, ctx);
+				if (err)
+					return err;
+			}
+			return PTR_ERR(skb);
+		}
+
+		owner_sk = unix_peer(sk);
+		if (owner != -1) {
+			cpt_object_t *pobj;
+			pobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, owner, ctx);
+			if (pobj == NULL) {
+				eprintk_ctx("orphan af_unix skb?\n");
+				kfree_skb(skb);
+				continue;
+			}
+			owner_sk = pobj->o_obj;
+		}
+		if (owner_sk == NULL) {
+			dprintk_ctx("orphan af_unix skb 2?\n");
+			kfree_skb(skb);
+			continue;
+		}
+		skb_set_owner_w(skb, owner_sk);
+		if (UNIXCB(skb).fp)
+			skb->destructor = unix_destruct_fds;
+		skb_queue_tail(&sk->sk_receive_queue, skb);
+		if (sk->sk_state == TCP_LISTEN) {
+			struct socket *sock = skb->sk->sk_socket;
+			if (sock == NULL) BUG();
+			if (sock->file) BUG();
+			skb->sk->sk_socket = NULL;
+			skb->sk->sk_sleep = NULL;
+			sock->sk = NULL;
+			sock_release(sock);
+		}
+	}
+	return 0;
+}
+
+
+/* All the sockets are created before we start to open files */
+
+int rst_sockets(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_SOCKET];
+	loff_t endsec;
+	cpt_object_t *obj;
+	struct cpt_section_hdr h;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err) {
+		eprintk_ctx("rst_sockets: ctx->pread: %d\n", err);
+		return err;
+	}
+	if (h.cpt_section != CPT_SECT_SOCKET || h.cpt_hdrlen < sizeof(h)) {
+		eprintk_ctx("rst_sockets: hdr err\n");
+		return -EINVAL;
+	}
+
+	/* The first pass: we create socket index and open listening sockets. */
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		struct cpt_sock_image *sbuf = cpt_get_buf(ctx);
+		err = rst_get_object(CPT_OBJ_SOCKET, sec, sbuf, ctx);
+		if (err) {
+			eprintk_ctx("rst_sockets: rst_get_object: %d\n", err);
+			cpt_release_buf(ctx);
+			return err;
+		}
+		if (sbuf->cpt_state == TCP_LISTEN) {
+			err = open_listening_socket(sec, sbuf, ctx);
+			cpt_release_buf(ctx);
+			if (err) {
+				eprintk_ctx("rst_sockets: open_listening_socket: %d\n", err);
+				return err;
+			}
+		} else {
+			cpt_release_buf(ctx);
+			obj = alloc_cpt_object(GFP_KERNEL, ctx);
+			if (obj == NULL)
+				return -ENOMEM;
+			cpt_obj_setindex(obj, sbuf->cpt_index, ctx);
+			cpt_obj_setpos(obj, sec, ctx);
+			obj->o_ppos  = sbuf->cpt_file;
+			intern_cpt_object(CPT_OBJ_SOCKET, obj, ctx);
+		}
+		sec += sbuf->cpt_next;
+	}
+
+	/* Pass 2: really restore sockets */
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		struct cpt_sock_image *sbuf;
+		if (obj->o_obj != NULL)
+			continue;
+		sbuf = cpt_get_buf(ctx);
+		err = rst_get_object(CPT_OBJ_SOCKET, obj->o_pos, sbuf, ctx);
+		if (err) {
+			eprintk_ctx("rst_sockets: rst_get_object: %d\n", err);
+			cpt_release_buf(ctx);
+			return err;
+		}
+		if (sbuf->cpt_state == TCP_LISTEN) BUG();
+		err = open_socket(obj, sbuf, ctx);
+		cpt_release_buf(ctx);
+		if (err) {
+			eprintk_ctx("rst_sockets: open_socket: %d\n", err);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+int rst_orphans(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_ORPHANS];
+	loff_t endsec;
+	cpt_object_t *obj;
+	struct cpt_section_hdr h;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_ORPHANS || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		struct cpt_sock_image *sbuf = cpt_get_buf(ctx);
+		err = rst_get_object(CPT_OBJ_SOCKET, sec, sbuf, ctx);
+		if (err) {
+			cpt_release_buf(ctx);
+			return err;
+		}
+		obj = alloc_cpt_object(GFP_KERNEL, ctx);
+		if (obj == NULL) {
+			cpt_release_buf(ctx);
+			return -ENOMEM;
+		}
+		obj->o_pos = sec;
+		obj->o_ppos  = sbuf->cpt_file;
+		err = open_socket(obj, sbuf, ctx);
+		dprintk_ctx("Restoring orphan: %d\n", err);
+		free_cpt_object(obj, ctx);
+		cpt_release_buf(ctx);
+		if (err)
+			return err;
+		sec += sbuf->cpt_next;
+	}
+
+	return 0;
+}
+
+
+/* Pass 3: I understand, this is not funny already :-),
+ * but we have to do another pass to establish links between
+ * not-paired AF_UNIX SOCK_DGRAM sockets and to restore AF_UNIX
+ * skb queues with proper skb->sk links.
+ *
+ * This could be made at the end of rst_sockets(), but we defer
+ * restoring af_unix queues up to the end of restoring files to
+ * make restoring passed FDs cleaner.
+ */
+
+int rst_sockets_complete(struct cpt_context *ctx)
+{
+	int err;
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		struct cpt_sock_image *sbuf;
+		struct sock *sk = obj->o_obj;
+		struct sock *peer;
+
+		if (!sk) BUG();
+
+		if (sk->sk_family != AF_UNIX)
+			continue;
+
+		sbuf = cpt_get_buf(ctx);
+		err = rst_get_object(CPT_OBJ_SOCKET, obj->o_pos, sbuf, ctx);
+		if (err) {
+			cpt_release_buf(ctx);
+			return err;
+		}
+
+		if (sbuf->cpt_next > sbuf->cpt_hdrlen)
+			restore_unix_rqueue(sk, sbuf, obj->o_pos, ctx);
+
+		cpt_release_buf(ctx);
+
+		if (sk->sk_type == SOCK_DGRAM && unix_peer(sk) == NULL) {
+			cpt_object_t *pobj;
+
+			sbuf = cpt_get_buf(ctx);
+			err = rst_get_object(CPT_OBJ_SOCKET, obj->o_pos, sbuf, ctx);
+			if (err) {
+				cpt_release_buf(ctx);
+				return err;
+			}
+
+			if (sbuf->cpt_peer != -1) {
+				pobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, sbuf->cpt_peer, ctx);
+				if (pobj) {
+					peer = pobj->o_obj;
+					sock_hold(peer);
+					unix_peer(sk) = peer;
+				}
+			}
+			cpt_release_buf(ctx);
+		}
+	}
+
+	rst_orphans(ctx);
+
+	return 0;
+}
+
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/rst_socket_in.c linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_socket_in.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/rst_socket_in.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_socket_in.c	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,494 @@
+/*
+ *
+ *  kernel/cpt/rst_socket_in.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/socket.h>
+#include <linux/tcp.h>
+#include <linux/jhash.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <linux/ipv6.h>
+#include <linux/igmp.h>
+#include <net/addrconf.h>
+#include <net/inet6_connection_sock.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_socket.h"
+#include "cpt_kernel.h"
+
+static inline unsigned long jiffies_import(__u32 tmo)
+{
+	__s32 delta = tmo;
+	return jiffies + (long)delta;
+}
+
+static inline __u32 tcp_jiffies_import(__u32 tmo)
+{
+	return ((__u32)jiffies) + tmo;
+}
+
+
+static int restore_queues(struct sock *sk, struct cpt_sock_image *si,
+			  loff_t pos, struct cpt_context *ctx)
+{
+	loff_t endpos;
+
+	pos = pos + si->cpt_hdrlen;
+	endpos = pos + si->cpt_next;
+	while (pos < endpos) {
+		struct sk_buff *skb;
+		__u32 type;
+
+		skb = rst_skb(&pos, NULL, &type, ctx);
+		if (IS_ERR(skb)) {
+			if (PTR_ERR(skb) == -EINVAL) {
+				int err;
+
+				err = rst_sock_attr(&pos, sk, ctx);
+				if (err)
+					return err;
+			}
+			return PTR_ERR(skb);
+		}
+
+		if (sk->sk_type == SOCK_STREAM) {
+			if (type == CPT_SKB_RQ) {
+				sk_stream_set_owner_r(skb, sk);
+				ub_tcprcvbuf_charge_forced(sk, skb);
+				skb_queue_tail(&sk->sk_receive_queue, skb);
+			} else if (type == CPT_SKB_OFOQ) {
+				struct tcp_sock *tp = tcp_sk(sk);
+				sk_stream_set_owner_r(skb, sk);
+				ub_tcprcvbuf_charge_forced(sk, skb);
+				skb_queue_tail(&tp->out_of_order_queue, skb);
+			} else if (type == CPT_SKB_WQ) {
+				sk->sk_wmem_queued += skb->truesize;
+				sk->sk_forward_alloc -= skb->truesize;
+				ub_tcpsndbuf_charge_forced(sk, skb);
+				skb_queue_tail(&sk->sk_write_queue, skb);
+			} else {
+				wprintk_ctx("strange stream queue type %u\n", type);
+				kfree_skb(skb);
+			}
+		} else {
+			if (type == CPT_SKB_RQ) {
+				skb_set_owner_r(skb, sk);
+				skb_queue_tail(&sk->sk_receive_queue, skb);
+			} else if (type == CPT_SKB_WQ) {
+				struct inet_sock *inet = inet_sk(sk);
+				if (inet->cork.fragsize) {
+					skb_set_owner_w(skb, sk);
+					skb_queue_tail(&sk->sk_write_queue, skb);
+				} else {
+					eprintk_ctx("cork skb is dropped\n");
+					kfree_skb(skb);
+				}
+			} else {
+				wprintk_ctx("strange dgram queue type %u\n", type);
+				kfree_skb(skb);
+			}
+		}
+	}
+	return 0;
+}
+
+static struct sock *find_parent(__u16 sport, cpt_context_t *ctx)
+{
+	cpt_object_t *obj;
+	for_each_object(obj, CPT_OBJ_SOCKET) {
+		struct sock *sk = obj->o_obj;
+		if (sk &&
+		    sk->sk_state == TCP_LISTEN &&
+		    (sk->sk_family == AF_INET || sk->sk_family == AF_INET6) &&
+		    inet_sk(sk)->sport == sport)
+			return sk;
+	}
+	return NULL;
+}
+
+static int rst_socket_tcp(struct cpt_sock_image *si, loff_t pos, struct sock *sk,
+			  struct cpt_context *ctx)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+	tp->pred_flags = si->cpt_pred_flags;
+	tp->rcv_nxt = si->cpt_rcv_nxt;
+	tp->snd_nxt = si->cpt_snd_nxt;
+	tp->snd_una = si->cpt_snd_una;
+	tp->snd_sml = si->cpt_snd_sml;
+	tp->rcv_tstamp = tcp_jiffies_import(si->cpt_rcv_tstamp);
+	tp->lsndtime = tcp_jiffies_import(si->cpt_lsndtime);
+	tp->tcp_header_len = si->cpt_tcp_header_len;
+	inet_csk(sk)->icsk_ack.pending = si->cpt_ack_pending;
+	inet_csk(sk)->icsk_ack.quick = si->cpt_quick;
+	inet_csk(sk)->icsk_ack.pingpong = si->cpt_pingpong;
+	inet_csk(sk)->icsk_ack.blocked = si->cpt_blocked;
+	inet_csk(sk)->icsk_ack.ato = si->cpt_ato;
+	inet_csk(sk)->icsk_ack.timeout = jiffies_import(si->cpt_ack_timeout);
+	inet_csk(sk)->icsk_ack.lrcvtime = tcp_jiffies_import(si->cpt_lrcvtime);
+	inet_csk(sk)->icsk_ack.last_seg_size = si->cpt_last_seg_size;
+	inet_csk(sk)->icsk_ack.rcv_mss = si->cpt_rcv_mss;
+	tp->snd_wl1 = si->cpt_snd_wl1;
+	tp->snd_wnd = si->cpt_snd_wnd;
+	tp->max_window = si->cpt_max_window;
+	inet_csk(sk)->icsk_pmtu_cookie = si->cpt_pmtu_cookie;
+	tp->mss_cache = si->cpt_mss_cache;
+	tp->rx_opt.mss_clamp = si->cpt_mss_clamp;
+	inet_csk(sk)->icsk_ext_hdr_len = si->cpt_ext_header_len;
+	inet_csk(sk)->icsk_ca_state = si->cpt_ca_state;
+	inet_csk(sk)->icsk_retransmits = si->cpt_retransmits;
+	tp->reordering = si->cpt_reordering;
+	tp->frto_counter = si->cpt_frto_counter;
+	tp->frto_highmark = si->cpt_frto_highmark;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
+	// // tp->adv_cong = si->cpt_adv_cong;
+#endif
+	inet_csk(sk)->icsk_accept_queue.rskq_defer_accept = si->cpt_defer_accept;
+	inet_csk(sk)->icsk_backoff = si->cpt_backoff;
+	tp->srtt = si->cpt_srtt;
+	tp->mdev = si->cpt_mdev;
+	tp->mdev_max = si->cpt_mdev_max;
+	tp->rttvar = si->cpt_rttvar;
+	tp->rtt_seq = si->cpt_rtt_seq;
+	inet_csk(sk)->icsk_rto = si->cpt_rto;
+	tp->packets_out = si->cpt_packets_out;
+	tp->left_out = si->cpt_left_out;
+	tp->retrans_out = si->cpt_retrans_out;
+	tp->lost_out = si->cpt_lost_out;
+	tp->sacked_out = si->cpt_sacked_out;
+	tp->fackets_out = si->cpt_fackets_out;
+	tp->snd_ssthresh = si->cpt_snd_ssthresh;
+	tp->snd_cwnd = si->cpt_snd_cwnd;
+	tp->snd_cwnd_cnt = si->cpt_snd_cwnd_cnt;
+	tp->snd_cwnd_clamp = si->cpt_snd_cwnd_clamp;
+	tp->snd_cwnd_used = si->cpt_snd_cwnd_used;
+	tp->snd_cwnd_stamp = tcp_jiffies_import(si->cpt_snd_cwnd_stamp);
+	inet_csk(sk)->icsk_timeout = tcp_jiffies_import(si->cpt_timeout);
+	tp->rcv_wnd = si->cpt_rcv_wnd;
+	tp->rcv_wup = si->cpt_rcv_wup;
+	tp->write_seq = si->cpt_write_seq;
+	tp->pushed_seq = si->cpt_pushed_seq;
+	tp->copied_seq = si->cpt_copied_seq;
+	tp->rx_opt.tstamp_ok = si->cpt_tstamp_ok;
+	tp->rx_opt.wscale_ok = si->cpt_wscale_ok;
+	tp->rx_opt.sack_ok = si->cpt_sack_ok;
+	tp->rx_opt.saw_tstamp = si->cpt_saw_tstamp;
+	tp->rx_opt.snd_wscale = si->cpt_snd_wscale;
+	tp->rx_opt.rcv_wscale = si->cpt_rcv_wscale;
+	tp->nonagle = si->cpt_nonagle;
+	tp->keepalive_probes = si->cpt_keepalive_probes;
+	tp->rx_opt.rcv_tsval = si->cpt_rcv_tsval;
+	tp->rx_opt.rcv_tsecr = si->cpt_rcv_tsecr;
+	tp->rx_opt.ts_recent = si->cpt_ts_recent;
+	tp->rx_opt.ts_recent_stamp = si->cpt_ts_recent_stamp;
+	tp->rx_opt.user_mss = si->cpt_user_mss;
+	tp->rx_opt.dsack = si->cpt_dsack;
+	tp->rx_opt.eff_sacks = si->cpt_num_sacks;
+	tp->duplicate_sack[0].start_seq = si->cpt_sack_array[0];
+	tp->duplicate_sack[0].end_seq = si->cpt_sack_array[1];
+	tp->selective_acks[0].start_seq = si->cpt_sack_array[2];
+	tp->selective_acks[0].end_seq = si->cpt_sack_array[3];
+	tp->selective_acks[1].start_seq = si->cpt_sack_array[4];
+	tp->selective_acks[1].end_seq = si->cpt_sack_array[5];
+	tp->selective_acks[2].start_seq = si->cpt_sack_array[6];
+	tp->selective_acks[2].end_seq = si->cpt_sack_array[7];
+	tp->selective_acks[3].start_seq = si->cpt_sack_array[8];
+	tp->selective_acks[3].end_seq = si->cpt_sack_array[9];
+
+	tp->window_clamp = si->cpt_window_clamp;
+	tp->rcv_ssthresh = si->cpt_rcv_ssthresh;
+	inet_csk(sk)->icsk_probes_out = si->cpt_probes_out;
+	tp->rx_opt.num_sacks = si->cpt_num_sacks;
+	tp->advmss = si->cpt_advmss;
+	inet_csk(sk)->icsk_syn_retries = si->cpt_syn_retries;
+	tp->ecn_flags = si->cpt_ecn_flags;
+	tp->prior_ssthresh = si->cpt_prior_ssthresh;
+	tp->high_seq = si->cpt_high_seq;
+	tp->retrans_stamp = si->cpt_retrans_stamp;
+	tp->undo_marker = si->cpt_undo_marker;
+	tp->undo_retrans = si->cpt_undo_retrans;
+	tp->urg_seq = si->cpt_urg_seq;
+	tp->urg_data = si->cpt_urg_data;
+	inet_csk(sk)->icsk_pending = si->cpt_pending;
+	tp->urg_mode = si->cpt_urg_mode;
+	tp->snd_up = si->cpt_snd_up;
+	tp->keepalive_time = si->cpt_keepalive_time;
+	tp->keepalive_intvl = si->cpt_keepalive_intvl;
+	tp->linger2 = si->cpt_linger2;
+
+	sk->sk_send_head = NULL;
+	for (skb = skb_peek(&sk->sk_write_queue);
+	     skb && skb != (struct sk_buff*)&sk->sk_write_queue;
+	     skb = skb->next) {
+		if (!after(tp->snd_nxt, TCP_SKB_CB(skb)->seq)) {
+			sk->sk_send_head = skb;
+			break;
+		}
+	}
+
+	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) {
+		struct inet_sock *inet = inet_sk(sk);
+		if (inet->num == 0) {
+			cpt_object_t *lobj = NULL;
+
+			if ((int)si->cpt_parent != -1)
+				lobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, si->cpt_parent, ctx);
+
+			if (lobj && lobj->o_obj) {
+				inet->num = ntohs(inet->sport);
+				local_bh_disable();
+				__inet_inherit_port(&tcp_hashinfo, lobj->o_obj, sk);
+				local_bh_enable();
+				dprintk_ctx("port inherited from parent\n");
+			} else {
+				struct sock *lsk = find_parent(inet->sport, ctx);
+				if (lsk) {
+					inet->num = ntohs(inet->sport);
+					local_bh_disable();
+					__inet_inherit_port(&tcp_hashinfo, lsk, sk);
+					local_bh_enable();
+					dprintk_ctx("port inherited\n");
+				} else {
+					eprintk_ctx("we are kinda lost...\n");
+				}
+			}
+		}
+
+		sk->sk_prot->hash(sk);
+
+		if (inet_csk(sk)->icsk_ack.pending&ICSK_ACK_TIMER)
+			sk_reset_timer(sk, &inet_csk(sk)->icsk_delack_timer, inet_csk(sk)->icsk_ack.timeout);
+		if (inet_csk(sk)->icsk_pending)
+			sk_reset_timer(sk, &inet_csk(sk)->icsk_retransmit_timer,
+				       inet_csk(sk)->icsk_timeout);
+		if (sock_flag(sk, SOCK_KEEPOPEN)) {
+			unsigned long expires = jiffies_import(si->cpt_ka_timeout);
+			if (time_after(jiffies, expires))
+				expires = jiffies + HZ;
+			sk_reset_timer(sk, &sk->sk_timer, expires);
+		}
+	}
+
+	return 0;
+}
+
+
+int rst_socket_in(struct cpt_sock_image *si, loff_t pos, struct sock *sk,
+		  struct cpt_context *ctx)
+{
+	struct inet_sock *inet = inet_sk(sk);
+
+	lock_sock(sk);
+
+	sk->sk_state = si->cpt_state;
+
+	inet->daddr = si->cpt_daddr;
+	inet->dport = si->cpt_dport;
+	inet->saddr = si->cpt_saddr;
+	inet->rcv_saddr = si->cpt_rcv_saddr;
+	inet->sport = si->cpt_sport;
+	inet->uc_ttl = si->cpt_uc_ttl;
+	inet->tos = si->cpt_tos;
+	inet->cmsg_flags = si->cpt_cmsg_flags;
+	inet->mc_index = si->cpt_mc_index;
+	inet->mc_addr = si->cpt_mc_addr;
+	inet->hdrincl = si->cpt_hdrincl;
+	inet->mc_ttl = si->cpt_mc_ttl;
+	inet->mc_loop = si->cpt_mc_loop;
+	inet->pmtudisc = si->cpt_pmtudisc;
+	inet->recverr = si->cpt_recverr;
+	inet->freebind = si->cpt_freebind;
+	inet->id = si->cpt_idcounter;
+
+	inet->cork.flags = si->cpt_cork_flags;
+	inet->cork.fragsize = si->cpt_cork_fragsize;
+	inet->cork.length = si->cpt_cork_length;
+	inet->cork.addr = si->cpt_cork_addr;
+	inet->cork.fl.fl4_src = si->cpt_cork_saddr;
+	inet->cork.fl.fl4_dst = si->cpt_cork_daddr;
+	inet->cork.fl.oif = si->cpt_cork_oif;
+	if (inet->cork.fragsize) {
+		if (ip_route_output_key(&inet->cork.rt, &inet->cork.fl)) {
+			eprintk_ctx("failed to restore cork route\n");
+			inet->cork.fragsize = 0;
+		}
+	}
+
+	if (sk->sk_type == SOCK_DGRAM && sk->sk_protocol == IPPROTO_UDP) {
+		struct udp_sock *up = udp_sk(sk);
+		up->pending = si->cpt_udp_pending;
+		up->corkflag = si->cpt_udp_corkflag;
+		up->encap_type = si->cpt_udp_encap;
+		up->len = si->cpt_udp_len;
+	}
+
+	if (sk->sk_family == AF_INET6) {
+		struct ipv6_pinfo *np = inet6_sk(sk);
+
+		memcpy(&np->saddr, si->cpt_saddr6, 16);
+		memcpy(&np->rcv_saddr, si->cpt_rcv_saddr6, 16);
+		memcpy(&np->daddr, si->cpt_daddr6, 16);
+		np->flow_label = si->cpt_flow_label6;
+		np->frag_size = si->cpt_frag_size6;
+		np->hop_limit = si->cpt_hop_limit6;
+		np->mcast_hops = si->cpt_mcast_hops6;
+		np->mcast_oif = si->cpt_mcast_oif6;
+		np->rxopt.all = si->cpt_rxopt6;
+		np->mc_loop = si->cpt_mc_loop6;
+		np->recverr = si->cpt_recverr6;
+		np->sndflow = si->cpt_sndflow6;
+		np->pmtudisc = si->cpt_pmtudisc6;
+		np->ipv6only = si->cpt_ipv6only6;
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		if (si->cpt_mapped) {
+			extern struct inet_connection_sock_af_ops ipv6_mapped;
+			if (sk->sk_type == SOCK_STREAM &&
+			    sk->sk_protocol == IPPROTO_TCP) {
+				inet_csk(sk)->icsk_af_ops = &ipv6_mapped;
+				sk->sk_backlog_rcv = tcp_v4_do_rcv;
+			}
+		}
+#endif
+	}
+
+	restore_queues(sk, si, pos, ctx);
+
+	if (sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP)
+		rst_socket_tcp(si, pos, sk, ctx);
+
+	release_sock(sk);
+	return 0;
+}
+
+int cpt_attach_accept(struct sock *lsk, struct sock *sk, cpt_context_t *ctx)
+{
+	struct request_sock *req;
+
+	if (lsk->sk_state != TCP_LISTEN)
+		return -EINVAL;
+
+	req = reqsk_alloc(&tcp_request_sock_ops);
+	if (!req)
+		return -ENOMEM;
+
+	sk->sk_socket = NULL;
+	sk->sk_sleep = NULL;
+	inet_csk_reqsk_queue_add(lsk, req, sk);
+	return 0;
+}
+
+static __inline__ u32 __tcp_v4_synq_hash(u32 raddr, u16 rport, u32 rnd)
+{
+	return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1));
+}
+
+int rst_restore_synwait_queue(struct sock *sk, struct cpt_sock_image *si,
+			      loff_t pos, struct cpt_context *ctx)
+{
+	int err;
+	loff_t end = si->cpt_next;
+
+	pos += si->cpt_hdrlen;
+	while (pos < end) {
+		struct cpt_openreq_image oi;
+
+		err = rst_get_object(CPT_OBJ_OPENREQ, pos, &oi, ctx);
+		if (err) {
+			err = rst_sock_attr(&pos, sk, ctx);
+			if (err)
+				return err;
+			continue;
+		}
+
+		if (oi.cpt_object == CPT_OBJ_OPENREQ) {
+			struct request_sock *req = reqsk_alloc(&tcp_request_sock_ops);
+			if (req == NULL)
+				return -ENOMEM;
+
+			memset(req, 0, sizeof(*req));
+			tcp_rsk(req)->rcv_isn = oi.cpt_rcv_isn;
+			tcp_rsk(req)->snt_isn = oi.cpt_snt_isn;
+			inet_rsk(req)->rmt_port = oi.cpt_rmt_port;
+			req->mss = oi.cpt_mss;
+			req->retrans = oi.cpt_retrans;
+			inet_rsk(req)->snd_wscale = oi.cpt_snd_wscale;
+			inet_rsk(req)->rcv_wscale = oi.cpt_rcv_wscale;
+			inet_rsk(req)->tstamp_ok = oi.cpt_tstamp_ok;
+			inet_rsk(req)->sack_ok = oi.cpt_sack_ok;
+			inet_rsk(req)->wscale_ok = oi.cpt_wscale_ok;
+			inet_rsk(req)->ecn_ok = oi.cpt_ecn_ok;
+			inet_rsk(req)->acked = oi.cpt_acked;
+			req->window_clamp = oi.cpt_window_clamp;
+			req->rcv_wnd = oi.cpt_rcv_wnd;
+			req->ts_recent = oi.cpt_ts_recent;
+			req->expires = jiffies_import(oi.cpt_expires);
+
+			if (oi.cpt_family == AF_INET) {
+				memcpy(&inet_rsk(req)->loc_addr, oi.cpt_loc_addr, 4);
+				memcpy(&inet_rsk(req)->rmt_addr, oi.cpt_rmt_addr, 4);
+				inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+			} else {
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+				memcpy(&inet6_rsk(req)->loc_addr, oi.cpt_loc_addr, 16);
+				memcpy(&inet6_rsk(req)->rmt_addr, oi.cpt_rmt_addr, 16);
+				inet6_rsk(req)->iif = oi.cpt_iif;
+				inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+#endif
+			}
+		}
+		pos += oi.cpt_next;
+	}
+	return 0;
+}
+
+int rst_sk_mcfilter_in(struct sock *sk, struct cpt_sockmc_image *v,
+		       loff_t pos, cpt_context_t *ctx)
+{
+	struct ip_mreqn imr;
+
+	if (v->cpt_mode || v->cpt_next != v->cpt_hdrlen) {
+		eprintk_ctx("IGMPv3 is still not supported\n");
+		return -EINVAL;
+	}
+
+	memset(&imr, 0, sizeof(imr));
+	imr.imr_ifindex = v->cpt_ifindex;
+	imr.imr_multiaddr.s_addr = v->cpt_mcaddr[0];
+	return ip_mc_join_group(sk, &imr);
+}
+
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+int rst_sk_mcfilter_in6(struct sock *sk, struct cpt_sockmc_image *v,
+			loff_t pos, cpt_context_t *ctx)
+{
+
+	if (v->cpt_mode || v->cpt_next != v->cpt_hdrlen) {
+		eprintk_ctx("IGMPv3 is still not supported\n");
+		return -EINVAL;
+	}
+
+	return ipv6_sock_mc_join(sk, v->cpt_ifindex,
+				 (struct in6_addr*)v->cpt_mcaddr);
+}
+#endif
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/rst_sysvipc.c linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_sysvipc.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/rst_sysvipc.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_sysvipc.c	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,409 @@
+/*
+ *
+ *  kernel/cpt/rst_sysvipc.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/shm.h>
+/* FIXME. x86_64 has asm/ipc.h forgotten? */
+#include <asm-generic/ipc.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+#include <ub/ub_mem.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_kernel.h"
+
+struct _warg {
+		struct file		*file;
+		struct cpt_sysvshm_image	*v;
+};
+
+static int fixup_one_shm(struct shmid_kernel *shp, void *arg)
+{
+	struct _warg *warg = arg;
+
+	if (shp->shm_file != warg->file)
+		return 0;
+	if (shp->shm_nattch)
+		return -EEXIST;
+
+	shp->shm_perm.uid = warg->v->cpt_uid;
+	shp->shm_perm.gid = warg->v->cpt_gid;
+	shp->shm_perm.cuid = warg->v->cpt_cuid;
+	shp->shm_perm.cgid = warg->v->cpt_cgid;
+	shp->shm_perm.mode = warg->v->cpt_mode;
+
+	shp->shm_atim = warg->v->cpt_atime;
+	shp->shm_dtim = warg->v->cpt_dtime;
+	shp->shm_ctim = warg->v->cpt_ctime;
+	shp->shm_cprid = warg->v->cpt_creator;
+	shp->shm_lprid = warg->v->cpt_last;
+
+	/* TODO: fix shp->mlock_user? */
+	return 1;
+}
+
+static int fixup_shm(struct file *file, struct cpt_sysvshm_image *v)
+{
+	struct _warg warg;
+
+	warg.file = file;
+	warg.v = v;
+
+	return sysvipc_walk_shm(fixup_one_shm, &warg);
+}
+
+static int fixup_shm_data(struct file *file, loff_t pos, loff_t end,
+			  struct cpt_context *ctx)
+{
+	struct cpt_page_block pgb;
+	ssize_t (*do_write)(struct file *, const char __user *, size_t, loff_t *ppos);
+
+	do_write = file->f_dentry->d_inode->i_fop->write;
+	if (do_write == NULL) {
+		eprintk_ctx("No TMPFS? Cannot restore content of SYSV SHM\n");
+		return -EINVAL;
+	}
+
+	while (pos < end) {
+		loff_t opos;
+		loff_t ipos;
+		int count;
+		int err;
+
+		err = rst_get_object(CPT_OBJ_PAGES, pos, &pgb, ctx);
+		if (err)
+			return err;
+		dprintk_ctx("restoring SHM block: %08x-%08x\n",
+		       (__u32)pgb.cpt_start, (__u32)pgb.cpt_end);
+		ipos = pos + pgb.cpt_hdrlen;
+		opos = pgb.cpt_start;
+		count = pgb.cpt_end-pgb.cpt_start;
+		while (count > 0) {
+			mm_segment_t oldfs;
+			int copy = count;
+
+			if (copy > PAGE_SIZE)
+				copy = PAGE_SIZE;
+			(void)cpt_get_buf(ctx);
+			oldfs = get_fs(); set_fs(KERNEL_DS);
+			err = ctx->pread(ctx->tmpbuf, copy, ctx, ipos);
+			set_fs(oldfs);
+			if (err) {
+				__cpt_release_buf(ctx);
+				return err;
+			}
+			oldfs = get_fs(); set_fs(KERNEL_DS);
+			ipos += copy;
+			err = do_write(file, ctx->tmpbuf, copy, &opos);
+			set_fs(oldfs);
+			__cpt_release_buf(ctx);
+			if (err != copy) {
+				eprintk_ctx("write() failure\n");
+				if (err >= 0)
+					err = -EIO;
+				return err;
+			}
+			count -= copy;
+		}
+		pos += pgb.cpt_next;
+	}
+	return 0;
+}
+
+struct file * rst_sysv_shm(loff_t pos, struct cpt_context *ctx)
+{
+	struct file *file;
+	int err;
+	loff_t dpos, epos;
+	union {
+		struct cpt_file_image		fi;
+		struct cpt_sysvshm_image	shmi;
+		struct cpt_inode_image 		ii;
+	} u;
+
+	err = rst_get_object(CPT_OBJ_FILE, pos, &u.fi, ctx);
+	if (err < 0)
+		goto err_out;
+	pos = u.fi.cpt_inode;
+	err = rst_get_object(CPT_OBJ_INODE, pos, &u.ii, ctx);
+	if (err < 0)
+		goto err_out;
+	dpos = pos + u.ii.cpt_hdrlen;
+	epos = pos + u.ii.cpt_next;
+	err = rst_get_object(CPT_OBJ_SYSV_SHM, pos + u.ii.cpt_hdrlen, &u.shmi, ctx);
+	if (err < 0)
+		goto err_out;
+	dpos += u.shmi.cpt_next;
+
+	file = sysvipc_setup_shm(u.shmi.cpt_key, u.shmi.cpt_id,
+				 u.shmi.cpt_segsz, u.shmi.cpt_mode);
+	if (!IS_ERR(file)) {
+		err = fixup_shm(file, &u.shmi);
+		if (err != -EEXIST && dpos < epos)
+			err = fixup_shm_data(file, dpos, epos, ctx);
+	}
+
+	return file;
+
+err_out:
+	return ERR_PTR(err);
+}
+
+static int attach_one_undo(int semid, struct sem_array *sma, void *arg)
+{
+	struct sem_undo *su = arg;
+	struct sem_undo_list *undo_list = current->sysvsem.undo_list;
+
+	if (semid != su->semid)
+		return 0;
+
+	su->proc_next = undo_list->proc_list;
+	undo_list->proc_list = su;
+
+	su->id_next = sma->undo;
+	sma->undo = su;
+
+	return 1;
+}
+
+static int attach_undo(struct sem_undo *su)
+{
+	return sysvipc_walk_sem(attach_one_undo, su);
+}
+
+static int do_rst_semundo(struct cpt_object_hdr *sui, loff_t pos, struct cpt_context *ctx)
+{
+	int err;
+	struct sem_undo_list *undo_list;
+
+	if (current->sysvsem.undo_list) {
+		eprintk_ctx("Funny undo_list\n");
+		return 0;
+	}
+
+	undo_list = ub_kmalloc(sizeof(struct sem_undo_list), GFP_KERNEL);
+	if (undo_list == NULL)
+		return -ENOMEM;
+	memset(undo_list, 0, sizeof(struct sem_undo_list));
+	atomic_set(&undo_list->refcnt, 1);
+	spin_lock_init(&undo_list->lock);
+	current->sysvsem.undo_list = undo_list;
+
+	if (sui->cpt_next > sui->cpt_hdrlen) {
+		loff_t offset = pos + sui->cpt_hdrlen;
+		do {
+			struct sem_undo *new;
+			struct cpt_sysvsem_undo_image spi;
+			err = rst_get_object(CPT_OBJ_SYSVSEM_UNDO_REC, offset, &spi, ctx);
+			if (err)
+				goto out;
+			new = ub_kmalloc(sizeof(struct sem_undo) +
+					 sizeof(short)*spi.cpt_nsem, GFP_KERNEL);
+			if (!new) {
+				err = -ENOMEM;
+				goto out;
+			}
+
+			memset(new, 0, sizeof(struct sem_undo) + sizeof(short)*spi.cpt_nsem);
+			new->semadj = (short *) &new[1];
+			new->semid = spi.cpt_id;
+			err = ctx->pread(new->semadj, spi.cpt_nsem*sizeof(short), ctx, offset + spi.cpt_hdrlen);
+			if (err) {
+				kfree(new);
+				goto out;
+			}
+			err = attach_undo(new);
+			if (err <= 0) {
+				if (err == 0)
+					err = -ENOENT;
+				kfree(new);
+				goto out;
+			}
+			offset += spi.cpt_next;
+		} while (offset < pos + sui->cpt_next);
+	}
+	err = 0;
+
+out:
+	return err;
+}
+
+__u32 rst_semundo_flag(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	__u32 flag = 0;
+
+#if 0
+	if (ti->cpt_sysvsem_undo == CPT_NULL ||
+	    lookup_cpt_obj_bypos(CPT_OBJ_SYSVSEM_UNDO, ti->cpt_sysvsem_undo))
+		flag |= CLONE_SYSVSEM;
+#endif
+	return flag;
+}
+
+int rst_semundo_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	int err;
+	struct sem_undo_list *f = current->sysvsem.undo_list;
+	cpt_object_t *obj;
+	struct cpt_object_hdr sui;
+
+	if (ti->cpt_sysvsem_undo == CPT_NULL) {
+		exit_sem(current);
+		return 0;
+	}
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_SYSVSEM_UNDO, ti->cpt_sysvsem_undo, ctx);
+	if (obj) {
+		if (obj->o_obj != f) {
+			exit_sem(current);
+			f = obj->o_obj;
+			atomic_inc(&f->refcnt);
+			current->sysvsem.undo_list = f;
+		}
+		return 0;
+	}
+
+	if ((err = rst_get_object(CPT_OBJ_SYSVSEM_UNDO, ti->cpt_sysvsem_undo, &sui, ctx)) != 0)
+		goto out;
+
+	if ((err = do_rst_semundo(&sui, ti->cpt_sysvsem_undo, ctx)) != 0)
+		goto out;
+
+	err = -ENOMEM;
+	obj = cpt_object_add(CPT_OBJ_SYSVSEM_UNDO, f, ctx);
+	if (obj) {
+		err = 0;
+		cpt_obj_setpos(obj, ti->cpt_sysvsem_undo, ctx);
+	}
+
+	return 0;
+
+out:
+	return err;
+}
+
+struct _sarg {
+	int semid;
+	struct cpt_sysvsem_image	*v;
+	__u32				*arr;
+};
+
+static int fixup_one_sem(int semid, struct sem_array *sma, void *arg)
+{
+	struct _sarg *warg = arg;
+
+	if (semid != warg->semid)
+		return 0;
+
+	sma->sem_perm.uid = warg->v->cpt_uid;
+	sma->sem_perm.gid = warg->v->cpt_gid;
+	sma->sem_perm.cuid = warg->v->cpt_cuid;
+	sma->sem_perm.cgid = warg->v->cpt_cgid;
+	sma->sem_perm.mode = warg->v->cpt_mode;
+	sma->sem_perm.seq = warg->v->cpt_seq;
+
+	sma->sem_ctime = warg->v->cpt_ctime;
+	sma->sem_otime = warg->v->cpt_otime;
+	memcpy(sma->sem_base, warg->arr, sma->sem_nsems*8);
+	return 1;
+}
+
+static int fixup_sem(int semid, struct cpt_sysvsem_image *v, __u32 *arr)
+{
+	struct _sarg warg;
+
+	warg.semid = semid;
+	warg.v = v;
+	warg.arr = arr;
+
+	return sysvipc_walk_sem(fixup_one_sem, &warg);
+}
+
+
+static int restore_sem(loff_t pos, struct cpt_sysvsem_image *si,
+		       struct cpt_context *ctx)
+{
+	int err;
+	__u32 *arr;
+	int nsems = (si->cpt_next - si->cpt_hdrlen)/8;
+
+	arr = kmalloc(nsems*8, GFP_KERNEL);
+	if (!arr)
+		return -ENOMEM;
+
+	err = ctx->pread(arr, nsems*8, ctx, pos+si->cpt_hdrlen);
+	if (err)
+		goto out;
+	err = sysvipc_setup_sem(si->cpt_key, si->cpt_id, nsems, si->cpt_mode);
+	if (err < 0) {
+		eprintk_ctx("SEM 3\n");
+		goto out;
+	}
+	err = fixup_sem(si->cpt_id, si, arr);
+	if (err == 0)
+		err = -ESRCH;
+	if (err > 0)
+		err = 0;
+out:
+	kfree(arr);
+	return err;
+}
+
+static int rst_sysv_sem(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_SYSV_SEM];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_sysvsem_image sbuf;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_SYSV_SEM || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		int err;
+		err = rst_get_object(CPT_OBJ_SYSV_SEM, sec, &sbuf, ctx);
+		if (err)
+			return err;
+		err = restore_sem(sec, &sbuf, ctx);
+		if (err)
+			return err;
+		sec += sbuf.cpt_next;
+	}
+	return 0;
+}
+
+int rst_sysv_ipc(struct cpt_context *ctx)
+{
+	return rst_sysv_sem(ctx);
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/rst_tty.c linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_tty.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/rst_tty.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_tty.c	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,380 @@
+/*
+ *
+ *  kernel/cpt/rst_tty.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mman.h>
+#include <linux/mount.h>
+#include <linux/tty.h>
+#include <linux/vmalloc.h>
+#include <asm/unistd.h>
+#include <asm/uaccess.h>
+#include <linux/cpt_image.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_mm.h"
+#include "cpt_files.h"
+#include "cpt_kernel.h"
+
+static int pty_setup(struct tty_struct *stty, loff_t pos,
+		     struct cpt_tty_image *pi, struct cpt_context *ctx)
+{
+	unsigned long flags;
+
+	stty->pgrp = -1;
+	stty->session = 0;
+	stty->packet = pi->cpt_packet;
+	stty->stopped = pi->cpt_stopped;
+	stty->hw_stopped = pi->cpt_hw_stopped;
+	stty->flow_stopped = pi->cpt_flow_stopped;
+#define DONOT_CHANGE ((1<<TTY_CHARGED)|(1<<TTY_CLOSING)|(1<<TTY_LDISC))
+	flags = stty->flags & DONOT_CHANGE;
+	stty->flags = flags | (pi->cpt_flags & ~DONOT_CHANGE);
+	stty->ctrl_status = pi->cpt_ctrl_status;
+	stty->winsize.ws_row = pi->cpt_ws_row;
+	stty->winsize.ws_col = pi->cpt_ws_col;
+	stty->winsize.ws_ypixel = pi->cpt_ws_prow;
+	stty->winsize.ws_xpixel = pi->cpt_ws_pcol;
+	stty->canon_column = pi->cpt_canon_column;
+	stty->column = pi->cpt_column;
+	stty->raw = pi->cpt_raw;
+	stty->real_raw = pi->cpt_real_raw;
+	stty->erasing = pi->cpt_erasing;
+	stty->lnext = pi->cpt_lnext;
+	stty->icanon = pi->cpt_icanon;
+	stty->closing = pi->cpt_closing;
+	stty->minimum_to_wake = pi->cpt_minimum_to_wake;
+
+	stty->termios->c_iflag = pi->cpt_c_iflag;
+	stty->termios->c_oflag = pi->cpt_c_oflag;
+	stty->termios->c_lflag = pi->cpt_c_lflag;
+	stty->termios->c_cflag = pi->cpt_c_cflag;
+	memcpy(&stty->termios->c_cc, &pi->cpt_c_cc, NCCS);
+	memcpy(stty->read_flags, pi->cpt_read_flags, sizeof(stty->read_flags));
+
+	if (pi->cpt_next > pi->cpt_hdrlen) {
+		int err;
+		struct cpt_obj_bits b;
+		err = rst_get_object(CPT_OBJ_BITS, pos + pi->cpt_hdrlen, &b, ctx);
+		if (err)
+			return err;
+		if (b.cpt_size == 0)
+			return 0;
+		err = ctx->pread(stty->read_buf, b.cpt_size, ctx, pos + pi->cpt_hdrlen + b.cpt_hdrlen);
+		if (err)
+			return err;
+
+		spin_lock_irq(&stty->read_lock);
+		stty->read_tail = 0;
+		stty->read_cnt = b.cpt_size;
+		stty->read_head = b.cpt_size;
+		stty->canon_head = stty->read_tail + pi->cpt_canon_head;
+		stty->canon_data = pi->cpt_canon_data;
+		spin_unlock_irq(&stty->read_lock);
+	}
+
+	return 0;
+}
+
+/* Find slave/master tty in image, when we already know master/slave.
+ * It might be optimized, of course. */
+static loff_t find_pty_pair(struct tty_struct *stty, loff_t pos, struct cpt_tty_image *pi, struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_TTY];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_tty_image *pibuf;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return CPT_NULL;
+	if (h.cpt_section != CPT_SECT_TTY || h.cpt_hdrlen < sizeof(h))
+		return CPT_NULL;
+	pibuf = kmalloc(sizeof(*pibuf), GFP_KERNEL);
+	if (pibuf == NULL) {
+		eprintk_ctx("cannot allocate buffer\n");
+		return CPT_NULL;
+	}
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		if (rst_get_object(CPT_OBJ_TTY, sec, pibuf, ctx))
+			return CPT_NULL;
+		if (pibuf->cpt_index == pi->cpt_index &&
+		    !((pi->cpt_drv_flags^pibuf->cpt_drv_flags)&TTY_DRIVER_DEVPTS_MEM) &&
+		    pos != sec) {
+			pty_setup(stty, sec, pibuf, ctx);
+			return sec;
+		}
+		sec += pibuf->cpt_next;
+	}
+	kfree(pibuf);
+	return CPT_NULL;
+}
+
+static int fixup_tty_attrs(struct cpt_inode_image *ii, struct file *master,
+			   struct cpt_context *ctx)
+{
+	int err;
+	struct iattr newattrs;
+	struct dentry *d = master->f_dentry;
+
+	newattrs.ia_valid = ATTR_UID|ATTR_GID|ATTR_MODE;
+	newattrs.ia_uid = ii->cpt_uid;
+	newattrs.ia_gid = ii->cpt_gid;
+	newattrs.ia_mode = ii->cpt_mode;
+
+	mutex_lock(&d->d_inode->i_mutex);
+	err = notify_change(d, &newattrs);
+	mutex_unlock(&d->d_inode->i_mutex);
+
+	return err;
+}
+
+/* NOTE: "portable", but ugly thing. To allocate /dev/pts/N, we open
+ * /dev/ptmx until we get pty with desired index.
+ */
+
+struct file *ptmx_open(int index, unsigned int flags)
+{
+	struct file *file;
+	struct file **stack = NULL;
+	int depth = 0;
+
+	for (;;) {
+		struct tty_struct *tty;
+
+		file = filp_open("/dev/ptmx", flags|O_NONBLOCK|O_NOCTTY|O_RDWR, 0);
+		if (IS_ERR(file))
+			break;
+		tty = file->private_data;
+		if (tty->index == index)
+			break;
+
+		if (depth == PAGE_SIZE/sizeof(struct file *)) {
+			fput(file);
+			file = ERR_PTR(-EBUSY);
+			break;
+		}
+		if (stack == NULL) {
+			stack = (struct file **)__get_free_page(GFP_KERNEL);
+			if (!stack) {
+				fput(file);
+				file = ERR_PTR(-ENOMEM);
+				break;
+			}
+		}
+		stack[depth] = file;
+		depth++;
+	}
+	while (depth > 0) {
+		depth--;
+		fput(stack[depth]);
+	}
+	if (stack)
+		free_page((unsigned long)stack);
+	return file;
+}
+
+
+struct file * rst_open_tty(struct cpt_file_image *fi, struct cpt_inode_image *ii,
+			   unsigned flags, struct cpt_context *ctx)
+{
+	int err;
+	cpt_object_t *obj;
+	struct file *master, *slave;
+	struct tty_struct *stty;
+	struct cpt_tty_image *pi;
+	static char *a = "pqrstuvwxyzabcde";
+	static char *b = "0123456789abcdef";
+	char pairname[16];
+	unsigned master_flags, slave_flags;
+
+	if (fi->cpt_priv == CPT_NULL)
+		return ERR_PTR(-EINVAL);
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_TTY, fi->cpt_priv, ctx);
+	if (obj && obj->o_parent) {
+		dprintk_ctx("obtained pty as pair to existing\n");
+		master = obj->o_parent;
+		stty = master->private_data;
+
+		if (stty->driver->subtype == PTY_TYPE_MASTER &&
+		    (stty->driver->flags&TTY_DRIVER_DEVPTS_MEM)) {
+			wprintk_ctx("cloning ptmx\n");
+			get_file(master);
+			return master;
+		}
+
+		master = dentry_open(dget(master->f_dentry),
+				     mntget(master->f_vfsmnt), flags);
+		if (!IS_ERR(master)) {
+			stty = master->private_data;
+			if (stty->driver->subtype != PTY_TYPE_MASTER)
+				fixup_tty_attrs(ii, master, ctx);
+		}
+		return master;
+	}
+
+	pi = cpt_get_buf(ctx);
+	err = rst_get_object(CPT_OBJ_TTY, fi->cpt_priv, pi, ctx);
+	if (err) {
+		cpt_release_buf(ctx);
+		return ERR_PTR(err);
+	}
+
+	master_flags = slave_flags = 0;
+	if (pi->cpt_drv_subtype == PTY_TYPE_MASTER)
+		master_flags = flags;
+	else
+		slave_flags = flags;
+
+	/*
+	 * Open pair master/slave.
+	 */
+	if (pi->cpt_drv_flags&TTY_DRIVER_DEVPTS_MEM) {
+		master = ptmx_open(pi->cpt_index, master_flags);
+	} else {
+		sprintf(pairname, "/dev/pty%c%c", a[pi->cpt_index/16], b[pi->cpt_index%16]);
+		master = filp_open(pairname, master_flags|O_NONBLOCK|O_NOCTTY|O_RDWR, 0);
+	}
+	if (IS_ERR(master)) {
+		eprintk_ctx("filp_open master: %Ld %ld\n", (long long)fi->cpt_priv, PTR_ERR(master));
+		cpt_release_buf(ctx);
+		return master;
+	}
+	stty = master->private_data;
+	clear_bit(TTY_PTY_LOCK, &stty->flags);
+	if (pi->cpt_drv_flags&TTY_DRIVER_DEVPTS_MEM)
+		sprintf(pairname, "/dev/pts/%d", stty->index);
+	else
+		sprintf(pairname, "/dev/tty%c%c", a[stty->index/16], b[stty->index%16]);
+	slave = filp_open(pairname, slave_flags|O_NONBLOCK|O_NOCTTY|O_RDWR, 0);
+	if (IS_ERR(slave)) {
+		eprintk_ctx("filp_open slave %s: %ld\n", pairname, PTR_ERR(slave));
+		fput(master);
+		cpt_release_buf(ctx);
+		return slave;
+	}
+
+	if (pi->cpt_drv_subtype != PTY_TYPE_MASTER)
+		fixup_tty_attrs(ii, slave, ctx);
+
+	cpt_object_add(CPT_OBJ_TTY, master->private_data, ctx);
+	cpt_object_add(CPT_OBJ_TTY, slave->private_data, ctx);
+	cpt_object_add(CPT_OBJ_FILE, master, ctx);
+	cpt_object_add(CPT_OBJ_FILE, slave, ctx);
+
+	if (pi->cpt_drv_subtype == PTY_TYPE_MASTER) {
+		loff_t pos;
+		obj = lookup_cpt_object(CPT_OBJ_TTY, master->private_data, ctx);
+		obj->o_parent = master;
+		cpt_obj_setpos(obj, fi->cpt_priv, ctx);
+		pty_setup(stty, fi->cpt_priv, pi, ctx);
+
+		obj = lookup_cpt_object(CPT_OBJ_TTY, slave->private_data, ctx);
+		obj->o_parent = slave;
+		pos = find_pty_pair(stty->link, fi->cpt_priv, pi, ctx);
+		cpt_obj_setpos(obj, pos, ctx);
+
+		obj = lookup_cpt_object(CPT_OBJ_FILE, slave, ctx);
+		cpt_obj_setpos(obj, CPT_NULL, ctx);
+		get_file(master);
+		cpt_release_buf(ctx);
+		return master;
+	} else {
+		loff_t pos;
+		obj = lookup_cpt_object(CPT_OBJ_TTY, slave->private_data, ctx);
+		obj->o_parent = slave;
+		cpt_obj_setpos(obj, fi->cpt_priv, ctx);
+		pty_setup(stty->link, fi->cpt_priv, pi, ctx);
+
+		obj = lookup_cpt_object(CPT_OBJ_TTY, master->private_data, ctx);
+		obj->o_parent = master;
+		pos = find_pty_pair(stty, fi->cpt_priv, pi, ctx);
+		cpt_obj_setpos(obj, pos, ctx);
+
+		obj = lookup_cpt_object(CPT_OBJ_FILE, master, ctx);
+		cpt_obj_setpos(obj, CPT_NULL, ctx);
+		get_file(slave);
+		cpt_release_buf(ctx);
+		return slave;
+	}
+}
+
+int rst_tty_jobcontrol(struct cpt_context *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_TTY];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_TTY || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		cpt_object_t *obj;
+		struct cpt_tty_image *pibuf = cpt_get_buf(ctx);
+
+		if (rst_get_object(CPT_OBJ_TTY, sec, pibuf, ctx)) {
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+
+		obj = lookup_cpt_obj_bypos(CPT_OBJ_TTY, sec, ctx);
+		if (obj) {
+			struct tty_struct *stty = obj->o_obj;
+			if ((int)pibuf->cpt_pgrp > 0) {
+				stty->pgrp = vpid_to_pid(pibuf->cpt_pgrp);
+				if (stty->pgrp == -1)
+					dprintk_ctx("unknown tty pgrp %d\n", pibuf->cpt_pgrp);
+			} else if (pibuf->cpt_pgrp) {
+				stty->pgrp = alloc_pidmap();
+				if (stty->pgrp < 0) {
+					eprintk_ctx("cannot allocate stray tty->pgrp");
+					cpt_release_buf(ctx);
+					return -EINVAL;
+				}
+				free_pidmap(stty->pgrp);
+			}
+			if ((int)pibuf->cpt_session > 0) {
+				int sess;
+				sess = vpid_to_pid(pibuf->cpt_session);
+				if (sess == -1) {
+					dprintk_ctx("unknown tty session %d\n", pibuf->cpt_session);
+				} else if (stty->session <= 0) {
+					stty->session = sess;
+				} else if (stty->session != sess) {
+					wprintk_ctx("tty session mismatch 2\n");
+				}
+			}
+		}
+		sec += pibuf->cpt_next;
+		cpt_release_buf(ctx);
+	}
+	return 0;
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/rst_ubc.c linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_ubc.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/rst_ubc.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_ubc.c	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,136 @@
+/*
+ *
+ *  kernel/cpt/rst_ubc.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/types.h>
+#include <ub/beancounter.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+
+struct user_beancounter *rst_lookup_ubc(__u64 pos, struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	obj = lookup_cpt_obj_bypos(CPT_OBJ_UBC, pos, ctx);
+	if (obj == NULL) {
+		eprintk("RST: unknown ub @%Ld\n", (long long)pos);
+		return get_beancounter(get_exec_ub());
+	}
+	return get_beancounter(obj->o_obj);
+}
+
+void copy_one_ubparm(struct ubparm *from, struct ubparm *to, int bc_parm_id)
+{
+	to[bc_parm_id].barrier = from[bc_parm_id].barrier;
+	to[bc_parm_id].limit = from[bc_parm_id].limit;
+}
+
+void set_one_ubparm_to_max(struct ubparm *ubprm, int bc_parm_id)
+{
+	ubprm[bc_parm_id].barrier = UB_MAXVALUE;
+	ubprm[bc_parm_id].limit = UB_MAXVALUE;
+}
+
+static void restore_one_bc_parm(struct cpt_ubparm *dmp, struct ubparm *prm,
+		int held)
+{
+	prm->barrier = (dmp->barrier == CPT_NULL ? UB_MAXVALUE : dmp->barrier);
+	prm->limit = (dmp->limit == CPT_NULL ? UB_MAXVALUE : dmp->limit);
+	if (held)
+		prm->held = dmp->held;
+	prm->maxheld = dmp->maxheld;
+	prm->minheld = dmp->minheld;
+	prm->failcnt = dmp->failcnt;
+}
+
+static int restore_one_bc(struct cpt_beancounter_image *v,
+		cpt_object_t *obj, struct cpt_context *ctx)
+{
+	struct user_beancounter *bc;
+	cpt_object_t *pobj;
+	int i;
+
+	if (v->cpt_parent != CPT_NULL) {
+		pobj = lookup_cpt_obj_bypos(CPT_OBJ_UBC, v->cpt_parent, ctx);
+		if (pobj == NULL)
+			return -ESRCH;
+		bc = get_subbeancounter_byid(pobj->o_obj, v->cpt_id, 1);
+	} else {
+		bc = get_exec_ub();
+		while (bc->parent)
+			bc = bc->parent;
+		get_beancounter(bc);
+	}
+	if (bc == NULL)
+		return -ENOMEM;
+	obj->o_obj = bc;
+
+	if (ctx->image_version < CPT_VERSION_18 &&
+			CPT_VERSION_MINOR(ctx->image_version) < 1)
+		goto out;
+
+	for (i = 0; i < UB_RESOURCES; i++) {
+		restore_one_bc_parm(v->cpt_parms + i * 2, bc->ub_parms + i, 0);
+		restore_one_bc_parm(v->cpt_parms + i * 2 + 1,
+				bc->ub_store + i, 1);
+	}
+
+out:
+	if (!bc->parent) {
+		for (i = 0; i < UB_RESOURCES; i++)
+			copy_one_ubparm(bc->ub_parms, ctx->saved_ubc, i);
+		set_one_ubparm_to_max(bc->ub_parms, UB_KMEMSIZE);
+		set_one_ubparm_to_max(bc->ub_parms, UB_NUMPROC);
+		set_one_ubparm_to_max(bc->ub_parms, UB_NUMFILE);
+		set_one_ubparm_to_max(bc->ub_parms, UB_DCACHESIZE);
+	}
+
+	return 0;
+}
+
+int rst_undump_ubc(struct cpt_context *ctx)
+{
+	loff_t start, end;
+	struct cpt_beancounter_image *v;
+	cpt_object_t *obj;
+	int err;
+
+	err = rst_get_section(CPT_SECT_UBC, ctx, &start, &end);
+	if (err)
+		return err;
+
+	while (start < end) {
+		v = cpt_get_buf(ctx);
+		err = rst_get_object(CPT_OBJ_UBC, start, v, ctx);
+		if (err) {
+			cpt_release_buf(ctx);
+			return err;
+		}
+
+		obj = alloc_cpt_object(GFP_KERNEL, ctx);
+		cpt_obj_setpos(obj, start, ctx);
+		intern_cpt_object(CPT_OBJ_UBC, obj, ctx);
+
+		restore_one_bc(v, obj, ctx);
+
+		cpt_release_buf(ctx);
+		start += v->cpt_next;
+	}
+	return 0;
+}
+
+void rst_finish_ubc(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+
+	for_each_object(obj, CPT_OBJ_UBC)
+		put_beancounter(obj->o_obj);
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpt/rst_undump.c linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_undump.c
--- linux-2.6.16.46-0.12.orig/kernel/cpt/rst_undump.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpt/rst_undump.c	2007-08-28 17:35:36.000000000 +0400
@@ -0,0 +1,875 @@
+/*
+ *
+ *  kernel/cpt/rst_undump.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/namespace.h>
+#include <linux/personality.h>
+#include <linux/binfmts.h>
+#include <linux/smp_lock.h>
+#include <linux/ve_proto.h>
+#include <linux/virtinfo.h>
+#include <linux/virtinfoscp.h>
+#include <linux/compat.h>
+#include <linux/vzcalluser.h>
+#include <ub/beancounter.h>
+#ifdef CONFIG_X86
+#include <asm/desc.h>
+#endif
+#include <asm/unistd.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
+#include "cpt_files.h"
+#include "cpt_mm.h"
+#include "cpt_process.h"
+#include "cpt_socket.h"
+#include "cpt_net.h"
+#include "cpt_ubc.h"
+#include "cpt_kernel.h"
+
+static int rst_utsname(cpt_context_t *ctx);
+
+
+struct thr_context {
+	struct completion init_complete;
+	struct completion task_done;
+	int error;
+	struct cpt_context *ctx;
+	cpt_object_t	*tobj;
+};
+
+static int rst_clone_children(cpt_object_t *obj, struct cpt_context *ctx);
+
+static int vps_rst_veinfo(struct cpt_context *ctx)
+{
+	int err;
+	struct cpt_veinfo_image *i;
+	struct ve_struct *ve;
+	struct timespec delta;
+	loff_t start, end;
+
+	err = rst_get_section(CPT_SECT_VEINFO, ctx, &start, &end);
+	if (err)
+		goto out;
+
+	i = cpt_get_buf(ctx);
+	memset(i, 0, sizeof(*i));
+	err = rst_get_object(CPT_OBJ_VEINFO, start, i, ctx);
+	if (err)
+		goto out_rel;
+
+	ve = get_exec_env();
+	ve->_shm_ctlall = i->shm_ctl_all;
+	ve->_shm_ctlmax = i->shm_ctl_max;
+	ve->_shm_ctlmni = i->shm_ctl_mni;
+
+	ve->_msg_ctlmax = i->msg_ctl_max;
+	ve->_msg_ctlmni = i->msg_ctl_mni;
+	ve->_msg_ctlmnb = i->msg_ctl_mnb;
+
+	BUILD_BUG_ON(sizeof(ve->_sem_ctls) != sizeof(i->sem_ctl_arr));
+	ve->_sem_ctls[0] = i->sem_ctl_arr[0];
+	ve->_sem_ctls[1] = i->sem_ctl_arr[1];
+	ve->_sem_ctls[2] = i->sem_ctl_arr[2];
+	ve->_sem_ctls[3] = i->sem_ctl_arr[3];
+
+	cpt_timespec_import(&delta, i->start_timespec_delta);
+	_set_normalized_timespec(&ve->start_timespec,
+			ve->start_timespec.tv_sec - delta.tv_sec,
+			ve->start_timespec.tv_nsec - delta.tv_nsec);
+	ve->start_jiffies -= i->start_jiffies_delta;
+	// // FIXME: what???
+	// // ve->start_cycles -= (s64)i->start_jiffies_delta * cycles_per_jiffy;
+
+	ctx->last_vpid = i->last_pid;
+
+	err = 0;
+out_rel:
+	cpt_release_buf(ctx);
+out:
+	return err;
+}
+
+static int vps_rst_reparent_root(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	int err;
+	struct env_create_param3 param;
+
+	do_posix_clock_monotonic_gettime(&ctx->cpt_monotonic_time);
+	do_gettimespec(&ctx->delta_time);
+
+	_set_normalized_timespec(&ctx->delta_time,
+				 ctx->delta_time.tv_sec - ctx->start_time.tv_sec,
+				 ctx->delta_time.tv_nsec - ctx->start_time.tv_nsec);
+	ctx->delta_nsec = (s64)ctx->delta_time.tv_sec*NSEC_PER_SEC + ctx->delta_time.tv_nsec;
+	if (ctx->delta_nsec < 0) {
+		wprintk_ctx("Wall time is behind source by %Ld ns, "
+			    "time sensitive applications can misbehave\n", (long long)-ctx->delta_nsec);
+	}
+
+        _set_normalized_timespec(&ctx->cpt_monotonic_time,
+                                 ctx->cpt_monotonic_time.tv_sec - ctx->delta_time.tv_sec,
+                                 ctx->cpt_monotonic_time.tv_nsec - ctx->delta_time.tv_nsec);
+
+	memset(&param, 0, sizeof(param));
+	param.iptables_mask = ctx->iptables_mask;
+	param.feature_mask = ctx->features;
+
+	/* feature_mask is set as required - pretend we know everything */
+	param.known_features = (ctx->image_version < CPT_VERSION_18) ?
+		VE_FEATURES_OLD : ~(__u64)0;
+
+	err = real_env_create(ctx->ve_id, VE_CREATE|VE_LOCK, 2,
+			&param, sizeof(param));
+	if (err < 0)
+		eprintk_ctx("real_env_create: %d\n", err);
+
+	get_exec_env()->jiffies_fixup =
+		(ctx->delta_time.tv_sec < 0 ?
+		 0 : timespec_to_jiffies(&ctx->delta_time)) -
+		(unsigned long)(get_jiffies_64() - ctx->virt_jiffies64);
+	printk("JFixup %ld %Ld\n", get_exec_env()->jiffies_fixup,
+		    (long long)ctx->delta_nsec);
+	return err < 0 ? err : 0;
+}
+
+static int hook(void *arg)
+{
+	struct thr_context *thr_ctx = arg;
+	struct cpt_context *ctx;
+	cpt_object_t *tobj;
+	struct cpt_task_image *ti;
+	int err = 0;
+	int exiting = 0;
+
+	current->state = TASK_UNINTERRUPTIBLE;
+	complete(&thr_ctx->init_complete);
+	schedule();
+
+	ctx = thr_ctx->ctx;
+	tobj = thr_ctx->tobj;
+	ti = tobj->o_image;
+
+	current->fs->umask = 0;
+
+	if (ti->cpt_pid == 1) {
+		err = vps_rst_reparent_root(tobj, ctx);
+
+		if (err) {
+			rst_report_error(err, ctx);
+			goto out;
+		}
+
+		memcpy(&cap_bset, &ti->cpt_ecap, sizeof(kernel_cap_t));
+
+		if (ctx->statusfile) {
+			fput(ctx->statusfile);
+			ctx->statusfile = NULL;
+		}
+
+		if (ctx->lockfile) {
+			mm_segment_t oldfs;
+			ssize_t err = -EINVAL;
+			char b;
+
+			oldfs = get_fs(); set_fs(KERNEL_DS);
+			if (ctx->lockfile->f_op && ctx->lockfile->f_op->read)
+				err = ctx->lockfile->f_op->read(ctx->lockfile, &b, 1, &ctx->lockfile->f_pos);
+			set_fs(oldfs);
+			fput(ctx->lockfile);
+			ctx->lockfile = NULL;
+		}
+
+		err = vps_rst_veinfo(ctx);
+		if (err) {
+			eprintk_ctx("rst_veinfo: %d\n", err);
+			goto out;
+		}
+
+		err = rst_utsname(ctx);
+		if (err) {
+			eprintk_ctx("rst_utsname: %d\n", err);
+			goto out;
+		}
+
+		err = rst_root_namespace(ctx);
+		if (err) {
+			eprintk_ctx("rst_namespace: %d\n", err);
+			goto out;
+		}
+
+		if ((err = rst_restore_net(ctx)) != 0) {
+			eprintk_ctx("rst_restore_net: %d\n", err);
+			goto out;
+		}
+
+		err = rst_sockets(ctx);
+		if (err) {
+			eprintk_ctx("rst_sockets: %d\n", err);
+			goto out;
+		}
+		err = rst_sysv_ipc(ctx);
+		if (err) {
+			eprintk_ctx("rst_sysv_ipc: %d\n", err);
+			goto out;
+		}
+	}
+
+	do {
+		if (current->user->uid != ti->cpt_user) {
+			struct user_struct *u = alloc_uid(ti->cpt_user);
+			if (!u) {
+				eprintk_ctx("alloc_user\n");
+			} else {
+				switch_uid(u);
+			}
+		}
+	} while (0);
+
+	if ((err = rst_mm_complete(ti, ctx)) != 0) {
+		eprintk_ctx("rst_mm: %d\n", err);
+		goto out;
+	}
+
+	if ((err = rst_files_complete(ti, ctx)) != 0) {
+		eprintk_ctx("rst_files: %d\n", err);
+		goto out;
+	}
+
+	if ((err = rst_fs_complete(ti, ctx)) != 0) {
+		eprintk_ctx("rst_fs: %d\n", err);
+		goto out;
+	}
+
+	if ((err = rst_semundo_complete(ti, ctx)) != 0) {
+		eprintk_ctx("rst_semundo: %d\n", err);
+		goto out;
+	}
+
+	if ((err = rst_signal_complete(ti, &exiting, ctx)) != 0) {
+		eprintk_ctx("rst_signal: %d\n", err);
+		goto out;
+	}
+
+	if (ti->cpt_personality != 0)
+		__set_personality(ti->cpt_personality);
+
+	current->set_child_tid = NULL;
+	current->clear_child_tid = NULL;
+	current->flags &= ~(PF_FORKNOEXEC|PF_SUPERPRIV);
+	current->flags |= ti->cpt_flags&(PF_FORKNOEXEC|PF_SUPERPRIV);
+	current->exit_code = ti->cpt_exit_code;
+	current->pdeath_signal = ti->cpt_pdeath_signal;
+
+	if (ti->cpt_restart.fn != CPT_RBL_0) {
+		if (ti->cpt_restart.fn != CPT_RBL_NANOSLEEP
+		    && ti->cpt_restart.fn != CPT_RBL_COMPAT_NANOSLEEP
+		    ) {
+			eprintk_ctx("unknown restart block\n");
+		} else {
+			DEFINE_KTIME(e);
+
+			current->thread_info->restart_block.fn = nanosleep_restart;
+#if defined(CONFIG_X86_64) && defined(CONFIG_COMPAT)
+			if (!ti->cpt_64bit)
+				current->thread_info->restart_block.fn = compat_nanosleep_restart;
+#endif
+			if (ctx->image_version >= CPT_VERSION_9)
+				e = ktime_add_ns(e, ti->cpt_restart.arg0);
+			else
+				e = ktime_add_ns(e, ti->cpt_restart.arg0*TICK_NSEC);
+			if (e.tv64 < 0)
+				e.tv64 = TICK_NSEC;
+			e = ktime_add(e, timespec_to_ktime(ctx->cpt_monotonic_time));
+			current->thread_info->restart_block.arg0 = e.tv64 & 0xFFFFFFFF;
+			current->thread_info->restart_block.arg1 = e.tv64 >> 32;
+			if (ctx->image_version >= CPT_VERSION_9) {
+				current->thread_info->restart_block.arg2 = ti->cpt_restart.arg2;
+				current->thread_info->restart_block.arg3 = ti->cpt_restart.arg3;
+			} else {
+				current->thread_info->restart_block.arg2 = ti->cpt_restart.arg1;
+				current->thread_info->restart_block.arg3 = CLOCK_MONOTONIC;
+			}
+		}
+	}
+
+	if (thread_group_leader(current)) {
+		current->signal->it_real_incr.tv64 = 0;
+		if (ctx->image_version >= CPT_VERSION_9) {
+			current->signal->it_real_incr =
+			ktime_add_ns(current->signal->it_real_incr, ti->cpt_it_real_incr);
+		} else {
+			current->signal->it_real_incr =
+			ktime_add_ns(current->signal->it_real_incr, ti->cpt_it_real_incr*TICK_NSEC);
+		}
+		current->signal->it_prof_incr = ti->cpt_it_prof_incr;
+		current->signal->it_virt_incr = ti->cpt_it_virt_incr;
+		current->signal->it_prof_expires = ti->cpt_it_prof_value;
+		current->signal->it_virt_expires = ti->cpt_it_virt_value;
+	}
+
+	err = rst_clone_children(tobj, ctx);
+	if (err) {
+		eprintk_ctx("rst_clone_children\n");
+		goto out;
+	}
+
+	if (exiting)
+		current->signal->flags |= SIGNAL_GROUP_EXIT;
+
+	if (ti->cpt_pid == 1) {
+		if ((err = rst_process_linkage(ctx)) != 0) {
+			eprintk_ctx("rst_process_linkage: %d\n", err);
+			goto out;
+		}
+		if ((err = rst_do_filejobs(ctx)) != 0) {
+			eprintk_ctx("rst_do_filejobs: %d\n", err);
+			goto out;
+		}
+		if ((err = rst_eventpoll(ctx)) != 0) {
+			eprintk_ctx("rst_eventpoll: %d\n", err);
+			goto out;
+		}
+#ifdef CONFIG_INOTIFY_USER
+		if ((err = rst_inotify(ctx)) != 0) {
+			eprintk_ctx("rst_inotify: %d\n", err);
+			goto out;
+		}
+#endif
+		if ((err = rst_sockets_complete(ctx)) != 0) {
+			eprintk_ctx("rst_sockets_complete: %d\n", err);
+			goto out;
+		}
+		if ((err = rst_stray_files(ctx)) != 0) {
+			eprintk_ctx("rst_stray_files: %d\n", err);
+			goto out;
+		}
+		if ((err = rst_posix_locks(ctx)) != 0) {
+			eprintk_ctx("rst_posix_locks: %d\n", err);
+			goto out;
+		}
+		if ((err = rst_tty_jobcontrol(ctx)) != 0) {
+			eprintk_ctx("rst_tty_jobcontrol: %d\n", err);
+			goto out;
+		}
+		if ((err = rst_restore_fs(ctx)) != 0) {
+			eprintk_ctx("rst_restore_fs: %d\n", err);
+			goto out;
+		}
+		if (virtinfo_notifier_call(VITYPE_SCP,
+				VIRTINFO_SCP_RESTORE, ctx) & NOTIFY_FAIL) {
+			err = -ECHRNG;
+			eprintk_ctx("scp_restore failed\n");
+			goto out;
+		}
+		/*
+		 * if (ctx->last_vpid)
+		 *	get_exec_env()->last_vpid = ctx->last_vpid;
+		 */
+	}
+
+out:
+	thr_ctx->error = err;
+	complete(&thr_ctx->task_done);
+
+	if (!err && (ti->cpt_state & (EXIT_ZOMBIE|EXIT_DEAD))) {
+		module_put(current->thread_info->exec_domain->module);
+		if (current->binfmt)
+			module_put(current->binfmt->module);
+		current->flags |= PF_EXIT_RESTART;
+		do_exit(ti->cpt_exit_code);
+	} else {
+		__set_current_state(TASK_UNINTERRUPTIBLE);
+	}
+
+	schedule();
+
+	dprintk_ctx("leaked through %d/%d %p\n", current->pid, virt_pid(current), current->mm);
+
+	module_put(THIS_MODULE);
+	complete_and_exit(NULL, 0);
+	return 0;
+}
+
+#if 0
+static void set_task_ubs(struct cpt_task_image *ti, struct cpt_context *ctx)
+{
+	struct task_beancounter *tbc;
+
+	tbc = task_bc(current);
+
+	put_beancounter(tbc->fork_sub);
+	tbc->fork_sub = rst_lookup_ubc(ti->cpt_task_ub, ctx);
+	if (ti->cpt_mm_ub != CPT_NULL) {
+		put_beancounter(tbc->exec_ub);
+		tbc->exec_ub = rst_lookup_ubc(ti->cpt_mm_ub, ctx);
+	}
+}
+#endif
+
+static int create_root_task(cpt_object_t *obj, struct cpt_context *ctx,
+		struct thr_context *thr_ctx)
+{
+	task_t *tsk;
+	int pid;
+
+	thr_ctx->ctx = ctx;
+	thr_ctx->error = 0;
+	init_completion(&thr_ctx->init_complete);
+	init_completion(&thr_ctx->task_done);
+#if 0
+	set_task_ubs(obj->o_image, ctx);
+#endif
+
+	pid = local_kernel_thread(hook, thr_ctx, 0, 0);
+	if (pid < 0)
+		return pid;
+	read_lock(&tasklist_lock);
+	tsk = find_task_by_pid_ve(pid);
+	if (tsk)
+		get_task_struct(tsk);
+	read_unlock(&tasklist_lock);
+	if (tsk == NULL)
+		return -ESRCH;
+	cpt_obj_setobj(obj, tsk, ctx);
+	thr_ctx->tobj = obj;
+	return 0;
+}
+
+static int rst_basic_init_task(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	task_t *tsk = obj->o_obj;
+	struct cpt_task_image *ti = obj->o_image;
+
+	memcpy(tsk->comm, ti->cpt_comm, sizeof(tsk->comm));
+	rst_mm_basic(obj, ti, ctx);
+	return 0;
+}
+
+static int make_baby(cpt_object_t *cobj,
+		     struct cpt_task_image *pi,
+		     struct cpt_context *ctx)
+{
+	unsigned long flags;
+	struct cpt_task_image *ci = cobj->o_image;
+	struct thr_context thr_ctx;
+	task_t *tsk;
+	pid_t pid;
+	struct fs_struct *tfs = NULL;
+
+	flags = rst_mm_flag(ci, ctx) | rst_files_flag(ci, ctx)
+		| rst_signal_flag(ci, ctx) | rst_semundo_flag(ci, ctx);
+	if (ci->cpt_rppid != pi->cpt_pid) {
+		flags |= CLONE_THREAD|CLONE_PARENT;
+		if (ci->cpt_signal != pi->cpt_signal ||
+		    !(flags&CLONE_SIGHAND) ||
+		    (!(flags&CLONE_VM) && pi->cpt_mm != CPT_NULL)) {
+			eprintk_ctx("something is wrong with threads: %d %d %d %Ld %Ld %08lx\n",
+			       (int)ci->cpt_pid, (int)ci->cpt_rppid, (int)pi->cpt_pid,
+			       (long long)ci->cpt_signal, (long long)pi->cpt_signal, flags
+			       );
+			return -EINVAL;
+		}
+	}
+
+	thr_ctx.ctx = ctx;
+	thr_ctx.error = 0;
+	init_completion(&thr_ctx.init_complete);
+	init_completion(&thr_ctx.task_done);
+	thr_ctx.tobj = cobj;
+
+#if 0
+	set_task_ubs(ci, ctx);
+#endif
+
+	if (current->fs == NULL) {
+		tfs = get_exec_env()->init_entry->fs;
+		if (tfs == NULL)
+			return -EINVAL;
+		atomic_inc(&tfs->count);
+		current->fs = tfs;
+	}
+	pid = local_kernel_thread(hook, &thr_ctx, flags, ci->cpt_pid);
+	if (tfs) {
+		current->fs = NULL;
+		atomic_dec(&tfs->count);
+	}
+	if (pid < 0)
+		return pid;
+
+	read_lock(&tasklist_lock);
+	tsk = find_task_by_pid_ve(pid);
+	if (tsk)
+		get_task_struct(tsk);
+	read_unlock(&tasklist_lock);
+	if (tsk == NULL)
+		return -ESRCH;
+	cpt_obj_setobj(cobj, tsk, ctx);
+	thr_ctx.tobj = cobj;
+	wait_for_completion(&thr_ctx.init_complete);
+	wait_task_inactive(cobj->o_obj);
+	rst_basic_init_task(cobj, ctx);
+
+	/* clone() increases group_stop_count if it was not zero and
+	 * CLONE_THREAD was asked. Undo.
+	 */
+	if (current->signal->group_stop_count && (flags & CLONE_THREAD)) {
+		if (tsk->signal != current->signal) BUG();
+		current->signal->group_stop_count--;
+	}
+
+	wake_up_process(tsk);
+	wait_for_completion(&thr_ctx.task_done);
+	wait_task_inactive(tsk);
+
+	return thr_ctx.error;
+}
+
+static int rst_clone_children(cpt_object_t *obj, struct cpt_context *ctx)
+{
+	int err = 0;
+	struct cpt_task_image *ti = obj->o_image;
+	cpt_object_t *cobj;
+
+	for_each_object(cobj, CPT_OBJ_TASK) {
+		struct cpt_task_image *ci = cobj->o_image;
+		if (cobj == obj)
+			continue;
+		if ((ci->cpt_rppid == ti->cpt_pid && ci->cpt_tgid == ci->cpt_pid) ||
+		    (ci->cpt_leader == ti->cpt_pid &&
+		     ci->cpt_tgid != ci->cpt_pid && ci->cpt_pid != 1)) {
+			err = make_baby(cobj, ti, ctx);
+			if (err) {
+				eprintk_ctx("make_baby: %d\n", err);
+				return err;
+			}
+		}
+	}
+	return 0;
+}
+
+static int read_task_images(struct cpt_context *ctx)
+{
+	int err;
+	loff_t start, end;
+
+	err = rst_get_section(CPT_SECT_TASKS, ctx, &start, &end);
+	if (err)
+		return err;
+
+	while (start < end) {
+		cpt_object_t *obj;
+		struct cpt_task_image *ti = cpt_get_buf(ctx);
+
+		err = rst_get_object(CPT_OBJ_TASK, start, ti, ctx);
+		if (err) {
+			cpt_release_buf(ctx);
+			return err;
+		}
+		if (ti->cpt_pid != 1 && !__is_virtual_pid(ti->cpt_pid)) {
+			eprintk_ctx("BUG: pid %d is not virtual\n", ti->cpt_pid);
+			cpt_release_buf(ctx);
+			return -EINVAL;
+		}
+		obj = alloc_cpt_object(GFP_KERNEL, ctx);
+		cpt_obj_setpos(obj, start, ctx);
+		intern_cpt_object(CPT_OBJ_TASK, obj, ctx);
+		obj->o_image = kmalloc(ti->cpt_next, GFP_KERNEL);
+		if (obj->o_image == NULL) {
+			cpt_release_buf(ctx);
+			return -ENOMEM;
+		}
+		memcpy(obj->o_image, ti, sizeof(*ti));
+		err = ctx->pread(obj->o_image + sizeof(*ti),
+				 ti->cpt_next - sizeof(*ti), ctx, start + sizeof(*ti));
+		cpt_release_buf(ctx);
+		if (err)
+			return err;
+		start += ti->cpt_next;
+	}
+	return 0;
+}
+
+
+static int vps_rst_restore_tree(struct cpt_context *ctx)
+{
+	int err;
+	cpt_object_t *obj;
+	struct thr_context thr_ctx_root;
+
+	err = read_task_images(ctx);
+	if (err)
+		return err;
+
+	err = rst_undump_ubc(ctx);
+	if (err)
+		return err;
+
+	if (virtinfo_notifier_call(VITYPE_SCP,
+				VIRTINFO_SCP_RSTCHECK, ctx) & NOTIFY_FAIL)
+		return -ECHRNG;
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	err = rst_setup_pagein(ctx);
+	if (err)
+		return err;
+#endif
+	for_each_object(obj, CPT_OBJ_TASK) {
+		err = create_root_task(obj, ctx, &thr_ctx_root);
+		if (err)
+			return err;
+
+		wait_for_completion(&thr_ctx_root.init_complete);
+		wait_task_inactive(obj->o_obj);
+		rst_basic_init_task(obj, ctx);
+
+		wake_up_process(obj->o_obj);
+		wait_for_completion(&thr_ctx_root.task_done);
+		wait_task_inactive(obj->o_obj);
+		err = thr_ctx_root.error;
+		if (err)
+			return err;
+		break;
+	}
+
+	return err;
+}
+
+
+int vps_rst_undump(struct cpt_context *ctx)
+{
+	int err;
+	unsigned long umask;
+
+	err = rst_open_dumpfile(ctx);
+	if (err)
+		return err;
+
+	if (ctx->tasks64) {
+#if defined(CONFIG_IA64)
+		if (ctx->image_arch != CPT_OS_ARCH_IA64)
+#elif defined(CONFIG_X86_64)
+		if (ctx->image_arch != CPT_OS_ARCH_EMT64)
+#else
+		if (1)
+#endif
+		{
+			eprintk_ctx("Cannot restore 64 bit VE on this architecture\n");
+			return -EINVAL;
+		}
+	}
+
+	umask = current->fs->umask;
+	current->fs->umask = 0;
+
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	err = rst_setup_pagein(ctx);
+#endif
+
+	if (err == 0)
+		err = vps_rst_restore_tree(ctx);
+
+	if (err == 0)
+		err = rst_restore_process(ctx);
+
+	if (err)
+		virtinfo_notifier_call(VITYPE_SCP,
+				VIRTINFO_SCP_RSTFAIL, ctx);
+
+	current->fs->umask = umask;
+
+        return err;
+}
+
+static int rst_unlock_ve(struct cpt_context *ctx)
+{
+	struct ve_struct *env;
+
+	env = get_ve_by_id(ctx->ve_id);
+	if (!env)
+		return -ESRCH;
+	down_write(&env->op_sem);
+	env->is_locked = 0;
+	up_write(&env->op_sem);
+	put_ve(env);
+	return 0;
+}
+
+int rst_resume(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+	int err = 0;
+#ifdef CONFIG_USER_RESOURCE
+	struct user_beancounter *bc;
+#endif
+
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file = obj->o_obj;
+
+		fput(file);
+	}
+
+#ifdef CONFIG_USER_RESOURCE
+	bc = get_beancounter_byuid(ctx->ve_id, 0);
+	BUG_ON(!bc);
+	copy_one_ubparm(ctx->saved_ubc, bc->ub_parms, UB_KMEMSIZE);
+	copy_one_ubparm(ctx->saved_ubc, bc->ub_parms, UB_NUMPROC);
+	copy_one_ubparm(ctx->saved_ubc, bc->ub_parms, UB_NUMFILE);
+	copy_one_ubparm(ctx->saved_ubc, bc->ub_parms, UB_DCACHESIZE);
+	put_beancounter(bc);
+#endif
+
+	rst_resume_network(ctx);
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+		struct cpt_task_image *ti = obj->o_image;
+
+		if (!tsk)
+			continue;
+
+		if (ti->cpt_state == TASK_UNINTERRUPTIBLE) {
+			dprintk_ctx("task %d/%d(%s) is started\n", virt_pid(tsk), tsk->pid, tsk->comm);
+
+			/* Weird... If a signal is sent to stopped task,
+			 * nobody makes recalc_sigpending(). We have to do
+			 * this by hands after wake_up_process().
+			 * if we did this before a signal could arrive before
+			 * wake_up_process() and stall.
+			 */
+			spin_lock_irq(&tsk->sighand->siglock);
+			if (!signal_pending(tsk))
+				recalc_sigpending_tsk(tsk);
+			spin_unlock_irq(&tsk->sighand->siglock);
+
+			wake_up_process(tsk);
+		} else {
+			if (ti->cpt_state == TASK_STOPPED ||
+			    ti->cpt_state == TASK_TRACED) {
+				set_task_state(tsk, ti->cpt_state);
+			}
+		}
+		put_task_struct(tsk);
+	}
+
+	rst_unlock_ve(ctx);
+
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	rst_complete_pagein(ctx, 0);
+#endif
+
+	rst_finish_ubc(ctx);
+	cpt_object_destroy(ctx);
+
+        return err;
+}
+
+int rst_kill(struct cpt_context *ctx)
+{
+	cpt_object_t *obj;
+	int err = 0;
+
+	for_each_object(obj, CPT_OBJ_FILE) {
+		struct file *file = obj->o_obj;
+
+		fput(file);
+	}
+
+	for_each_object(obj, CPT_OBJ_TASK) {
+		task_t *tsk = obj->o_obj;
+
+		if (tsk == NULL)
+			continue;
+
+		if (tsk->exit_state == 0) {
+			send_sig(SIGKILL, tsk, 1);
+
+			spin_lock_irq(&tsk->sighand->siglock);
+			sigfillset(&tsk->blocked);
+			sigdelsetmask(&tsk->blocked, sigmask(SIGKILL));
+			set_tsk_thread_flag(tsk, TIF_SIGPENDING);
+			clear_tsk_thread_flag(tsk, TIF_FREEZE);
+			if (tsk->flags & PF_FROZEN)
+				tsk->flags &= ~PF_FROZEN;
+			spin_unlock_irq(&tsk->sighand->siglock);
+
+			wake_up_process(tsk);
+		}
+
+		put_task_struct(tsk);
+	}
+
+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
+	rst_complete_pagein(ctx, 1);
+#endif
+
+	rst_finish_ubc(ctx);
+	cpt_object_destroy(ctx);
+
+        return err;
+}
+
+static int rst_utsname(cpt_context_t *ctx)
+{
+	int err;
+	loff_t sec = ctx->sections[CPT_SECT_UTSNAME];
+	loff_t endsec;
+	struct cpt_section_hdr h;
+	struct cpt_object_hdr o;
+	int i;
+
+	if (sec == CPT_NULL)
+		return 0;
+
+	err = ctx->pread(&h, sizeof(h), ctx, sec);
+	if (err)
+		return err;
+	if (h.cpt_section != CPT_SECT_UTSNAME || h.cpt_hdrlen < sizeof(h))
+		return -EINVAL;
+
+	i = 0;
+	endsec = sec + h.cpt_next;
+	sec += h.cpt_hdrlen;
+	while (sec < endsec) {
+		int len;
+		char *ptr;
+		err = rst_get_object(CPT_OBJ_NAME, sec, &o, ctx);
+		if (err)
+			return err;
+		len = o.cpt_next - o.cpt_hdrlen;
+		if (len > __NEW_UTS_LEN+1)
+			return -ENAMETOOLONG;
+		switch (i) {
+		case 0:
+			ptr = ve_utsname.nodename; break;
+		case 1:
+			ptr = ve_utsname.domainname; break;
+		default:
+			return -EINVAL;
+		}
+		err = ctx->pread(ptr, len, ctx, sec+o.cpt_hdrlen);
+		if (err)
+			return err;
+		i++;
+		sec += o.cpt_next;
+	}
+
+	return 0;
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpu.c linux-2.6.16.46-0.12-027test011/kernel/cpu.c
--- linux-2.6.16.46-0.12.orig/kernel/cpu.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/kernel/cpu.c	2007-08-28 17:35:34.000000000 +0400
@@ -21,6 +21,11 @@ static DECLARE_MUTEX(cpucontrol);
 static struct notifier_block *cpu_chain;
 
 #ifdef CONFIG_HOTPLUG_CPU
+
+#ifdef CONFIG_SCHED_VCPU
+#error "CONFIG_HOTPLUG_CPU isn't supported with CONFIG_SCHED_VCPU"
+#endif
+
 static struct task_struct *lock_cpu_hotplug_owner;
 static int lock_cpu_hotplug_depth;
 
@@ -95,8 +100,8 @@ static inline void check_for_tasks(int c
 	struct task_struct *p;
 
 	write_lock_irq(&tasklist_lock);
-	for_each_process(p) {
-		if (task_cpu(p) == cpu &&
+	for_each_process_all(p) {
+		if (task_pcpu(p) == cpu &&
 		    (!cputime_eq(p->utime, cputime_zero) ||
 		     !cputime_eq(p->stime, cputime_zero)))
 			printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
@@ -106,6 +111,13 @@ static inline void check_for_tasks(int c
 	write_unlock_irq(&tasklist_lock);
 }
 
+#ifdef CONFIG_SCHED_VCPU
+#error VCPU vs. HOTPLUG: fix hotplug code below
+/*
+ * What should be fixed:
+ * - check for if (idle_cpu()) yield()
+ */
+#endif
 /* Take this CPU down. */
 static int take_cpu_down(void *unused)
 {
diff -upr linux-2.6.16.46-0.12.orig/kernel/cpuset.c linux-2.6.16.46-0.12-027test011/kernel/cpuset.c
--- linux-2.6.16.46-0.12.orig/kernel/cpuset.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/cpuset.c	2007-08-28 17:35:31.000000000 +0400
@@ -971,7 +971,7 @@ static int update_nodemask(struct cpuset
 	n = 0;
 
 	/* Load up mmarray[] with mm reference for each task in cpuset. */
-	do_each_thread(g, p) {
+	do_each_thread_all(g, p) {
 		struct mm_struct *mm;
 
 		if (n >= ntasks) {
@@ -985,7 +985,7 @@ static int update_nodemask(struct cpuset
 		if (!mm)
 			continue;
 		mmarray[n++] = mm;
-	} while_each_thread(g, p);
+	} while_each_thread_all(g, p);
 	write_unlock_irq(&tasklist_lock);
 
 	/*
@@ -1198,7 +1198,7 @@ static int attach_task(struct cpuset *cs
 	if (pid) {
 		read_lock(&tasklist_lock);
 
-		tsk = find_task_by_pid(pid);
+		tsk = find_task_by_pid_all(pid);
 		if (!tsk || tsk->flags & PF_EXITING) {
 			read_unlock(&tasklist_lock);
 			return -ESRCH;
@@ -1650,13 +1650,13 @@ static int pid_array_load(pid_t *pidarra
 
 	read_lock(&tasklist_lock);
 
-	do_each_thread(g, p) {
+	do_each_thread_all(g, p) {
 		if (p->cpuset == cs) {
 			pidarray[n++] = p->pid;
 			if (unlikely(n == npids))
 				goto array_full;
 		}
-	} while_each_thread(g, p);
+	} while_each_thread_all(g, p);
 
 array_full:
 	read_unlock(&tasklist_lock);
@@ -1742,12 +1742,7 @@ static ssize_t cpuset_tasks_read(struct 
 {
 	struct ctr_struct *ctr = file->private_data;
 
-	if (*ppos + nbytes > ctr->bufsz)
-		nbytes = ctr->bufsz - *ppos;
-	if (copy_to_user(buf, ctr->buf + *ppos, nbytes))
-		return -EFAULT;
-	*ppos += nbytes;
-	return nbytes;
+	return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz);
 }
 
 static int cpuset_tasks_release(struct inode *unused_inode, struct file *file)
diff -upr linux-2.6.16.46-0.12.orig/kernel/exit.c linux-2.6.16.46-0.12-027test011/kernel/exit.c
--- linux-2.6.16.46-0.12.orig/kernel/exit.c	2007-08-24 19:28:27.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/exit.c	2007-08-28 17:35:36.000000000 +0400
@@ -18,6 +18,7 @@
 #include <linux/personality.h>
 #include <linux/tty.h>
 #include <linux/namespace.h>
+#include <linux/virtinfo.h>
 #include <linux/key.h>
 #include <linux/security.h>
 #include <linux/cpu.h>
@@ -40,6 +41,8 @@
 #include <linux/pagg.h>
 #include <linux/audit.h> /* for audit_free() */
 
+#include <ub/ub_misc.h>
+
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/pgtable.h>
@@ -50,7 +53,7 @@ extern struct task_struct *child_reaper;
 
 int getrusage(struct task_struct *, int, struct rusage __user *);
 
-static void exit_mm(struct task_struct * tsk);
+void exit_mm(struct task_struct * tsk);
 
 static void __unhash_process(struct task_struct *p)
 {
@@ -65,18 +68,19 @@ static void __unhash_process(struct task
 	}
 
 	REMOVE_LINKS(p);
+	REMOVE_VE_LINKS(p);
 }
 
 void release_task(struct task_struct * p)
 {
 	int zap_leader;
 	task_t *leader;
-	struct dentry *proc_dentry;
+	struct dentry *proc_dentry[2];
 
 repeat: 
 	atomic_dec(&p->user->processes);
 	spin_lock(&p->proc_lock);
-	proc_dentry = proc_pid_unhash(p);
+	proc_pid_unhash(p, proc_dentry);
 	write_lock_irq(&tasklist_lock);
 	if (unlikely(p->ptrace))
 		__ptrace_unlink(p);
@@ -88,6 +92,8 @@ repeat: 
 	 * the process by __unhash_process.
 	 */
 	__unhash_process(p);
+	nr_zombie--;
+	atomic_inc(&nr_dead);
 
 	/*
 	 * If we are the last non-leader member of the thread
@@ -115,6 +121,8 @@ repeat: 
 	spin_unlock(&p->proc_lock);
 	proc_pid_flush(proc_dentry);
 	release_thread(p);
+	ub_task_uncharge(p);
+	pput_ve(p->ve_task_info.owner_env);
 	put_task_struct(p);
 
 	p = leader;
@@ -126,10 +134,10 @@ repeat: 
 
 void unhash_process(struct task_struct *p)
 {
-	struct dentry *proc_dentry;
+	struct dentry *proc_dentry[2];
 
 	spin_lock(&p->proc_lock);
-	proc_dentry = proc_pid_unhash(p);
+	proc_pid_unhash(p, proc_dentry);
 	write_lock_irq(&tasklist_lock);
 	__unhash_process(p);
 	write_unlock_irq(&tasklist_lock);
@@ -147,14 +155,16 @@ int session_of_pgrp(int pgrp)
 	struct task_struct *p;
 	int sid = -1;
 
+	WARN_ON(is_virtual_pid(pgrp));
+
 	read_lock(&tasklist_lock);
-	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
+	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
 		if (p->signal->session > 0) {
 			sid = p->signal->session;
 			goto out;
 		}
-	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
-	p = find_task_by_pid(pgrp);
+	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
+	p = find_task_by_pid_ve(pgrp);
 	if (p)
 		sid = p->signal->session;
 out:
@@ -176,17 +186,19 @@ static int will_become_orphaned_pgrp(int
 	struct task_struct *p;
 	int ret = 1;
 
-	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
+	WARN_ON(is_virtual_pid(pgrp));
+
+	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
 		if (p == ignored_task
 				|| p->exit_state
-				|| p->real_parent->pid == 1)
+				|| virt_pid(p->real_parent) == 1)
 			continue;
 		if (process_group(p->real_parent) != pgrp
 			    && p->real_parent->signal->session == p->signal->session) {
 			ret = 0;
 			break;
 		}
-	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
+	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
 	return ret;	/* (sighing) "Often!" */
 }
 
@@ -194,6 +206,8 @@ int is_orphaned_pgrp(int pgrp)
 {
 	int retval;
 
+	WARN_ON(is_virtual_pid(pgrp));
+
 	read_lock(&tasklist_lock);
 	retval = will_become_orphaned_pgrp(pgrp, NULL);
 	read_unlock(&tasklist_lock);
@@ -206,7 +220,7 @@ static int has_stopped_jobs(int pgrp)
 	int retval = 0;
 	struct task_struct *p;
 
-	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
+	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
 		if (p->state != TASK_STOPPED)
 			continue;
 
@@ -222,7 +236,7 @@ static int has_stopped_jobs(int pgrp)
 
 		retval = 1;
 		break;
-	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
+	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
 	return retval;
 }
 
@@ -271,6 +285,9 @@ void __set_special_pids(pid_t session, p
 {
 	struct task_struct *curr = current->group_leader;
 
+	WARN_ON(__is_virtual_pid(pgrp));
+	WARN_ON(__is_virtual_pid(session));
+
 	if (curr->signal->session != session) {
 		detach_pid(curr, PIDTYPE_SID);
 		curr->signal->session = session;
@@ -289,6 +306,7 @@ void set_special_pids(pid_t session, pid
 	__set_special_pids(session, pgrp);
 	write_unlock_irq(&tasklist_lock);
 }
+EXPORT_SYMBOL(set_special_pids);
 
 /*
  * Let kernel threads use this to say that they
@@ -508,13 +526,17 @@ EXPORT_SYMBOL_GPL(exit_fs);
  * Turn us into a lazy TLB process if we
  * aren't already..
  */
-static void exit_mm(struct task_struct * tsk)
+void exit_mm(struct task_struct * tsk)
 {
 	struct mm_struct *mm = tsk->mm;
 
 	mm_release(tsk, mm);
 	if (!mm)
 		return;
+
+	if (test_tsk_thread_flag(tsk, TIF_MEMDIE))
+		mm->oom_killed = 1;
+
 	/*
 	 * Serialize with any possible pending coredump.
 	 * We must hold mmap_sem around checking core_waiters
@@ -543,6 +565,7 @@ static void exit_mm(struct task_struct *
 	task_unlock(tsk);
 	mmput(mm);
 }
+EXPORT_SYMBOL_GPL(exit_mm);
 
 static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_reaper)
 {
@@ -621,13 +644,12 @@ static void reparent_thread(task_t *p, t
 static void forget_original_parent(struct task_struct * father,
 					  struct list_head *to_release)
 {
-	struct task_struct *p, *reaper = father;
+	struct task_struct *p, *tsk_reaper, *reaper = father;
 	struct list_head *_p, *_n;
 
 	do {
 		reaper = next_thread(reaper);
 		if (reaper == father) {
-			reaper = child_reaper;
 			break;
 		}
 	} while (reaper->exit_state);
@@ -649,9 +671,17 @@ static void forget_original_parent(struc
 		/* if father isn't the real parent, then ptrace must be enabled */
 		BUG_ON(father != p->real_parent && !ptrace);
 
+		tsk_reaper = reaper;
+		if (tsk_reaper == father)
+#ifdef CONFIG_VE
+			tsk_reaper = VE_TASK_INFO(p)->owner_env->init_entry;
+		if (tsk_reaper == p ||
+		    p->group_leader == VE_TASK_INFO(p)->owner_env->init_entry)
+#endif
+			tsk_reaper = child_reaper;
 		if (father == p->real_parent) {
-			/* reparent with a reaper, real father it's us */
-			choose_new_parent(p, reaper, child_reaper);
+			/* reparent with a tsk_reaper, real father it's us */
+			choose_new_parent(p, tsk_reaper, child_reaper);
 			reparent_thread(p, father, 0);
 		} else {
 			/* reparent ptraced task to its real parent */
@@ -672,7 +702,15 @@ static void forget_original_parent(struc
 	}
 	list_for_each_safe(_p, _n, &father->ptrace_children) {
 		p = list_entry(_p,struct task_struct,ptrace_list);
-		choose_new_parent(p, reaper, child_reaper);
+
+		tsk_reaper = reaper;
+		if (tsk_reaper == father)
+#ifdef CONFIG_VE
+			tsk_reaper = VE_TASK_INFO(p)->owner_env->init_entry;
+		if (tsk_reaper == p)
+#endif
+			tsk_reaper = child_reaper;
+		choose_new_parent(p, tsk_reaper, child_reaper);
 		reparent_thread(p, father, 1);
 	}
 }
@@ -768,6 +806,9 @@ static void exit_notify(struct task_stru
 	    && !capable(CAP_KILL))
 		tsk->exit_signal = SIGCHLD;
 
+	if (tsk->exit_signal != -1 && t == child_reaper)
+		/* We dont want people slaying init. */
+		tsk->exit_signal = SIGCHLD;
 
 	/* If something other than our normal parent is ptracing us, then
 	 * send it a SIGCHLD instead of honoring exit_signal.  exit_signal
@@ -786,6 +827,7 @@ static void exit_notify(struct task_stru
 	     unlikely(tsk->parent->signal->flags & SIGNAL_GROUP_EXIT)))
 		state = EXIT_DEAD;
 	tsk->exit_state = state;
+	nr_zombie++;
 
 	write_unlock_irq(&tasklist_lock);
 
@@ -800,6 +842,82 @@ static void exit_notify(struct task_stru
 		release_task(tsk);
 }
 
+#ifdef CONFIG_VE
+/*
+ * Handle exitting of init process, it's a special case for VE.
+ */
+static void do_initproc_exit(void)
+{
+	struct task_struct *tsk;
+	struct ve_struct *env;
+	struct siginfo info;
+	struct task_struct *g, *p;
+	long delay = 1L;
+
+	tsk = current;
+	env = VE_TASK_INFO(current)->owner_env;
+	if (env->init_entry != tsk)
+		return;
+
+	if (ve_is_super(env) && tsk->pid == 1)
+		panic("Attempted to kill init!");
+
+	memset(&info, 0, sizeof(info));
+	info.si_errno = 0;
+	info.si_code = SI_KERNEL;
+	info.si_pid = virt_pid(tsk);
+	info.si_uid = current->uid;
+	info.si_signo = SIGKILL;
+
+	/*
+	 * Here the VE changes its state into "not running".
+	 * op_sem taken for write is a barrier to all VE manipulations from
+	 * ioctl: it waits for operations currently in progress and blocks all
+	 * subsequent operations until is_running is set to 0 and op_sem is
+	 * released.
+	 */
+	down_write(&env->op_sem);
+	env->is_running = 0;
+	up_write(&env->op_sem);
+
+	/* send kill to all processes of VE */
+	read_lock(&tasklist_lock);
+	do_each_thread_ve(g, p) {
+		force_sig_info(SIGKILL, &info, p);
+	} while_each_thread_ve(g, p);
+	read_unlock(&tasklist_lock);
+
+	/* wait for all init childs exit */
+	while (atomic_read(&env->pcounter) > 1) {
+		if (sys_wait4(-1, NULL, __WALL | WNOHANG, NULL) > 0)
+			continue;
+		/* it was ENOCHLD or no more children somehow */
+		if (atomic_read(&env->pcounter) == 1)
+			break;
+
+		/* clear all signals to avoid wakeups */
+		if (signal_pending(tsk))
+			flush_signals(tsk);
+		/* we have child without signal sent */
+		__set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(delay);
+		delay = (delay < HZ) ? (delay << 1) : HZ;
+		read_lock(&tasklist_lock);
+		do_each_thread_ve(g, p) {
+			if (p != tsk)
+				force_sig_info(SIGKILL, &info, p);
+		} while_each_thread_ve(g, p);
+		read_unlock(&tasklist_lock);
+	}
+	env->init_entry = child_reaper;
+	write_lock_irq(&tasklist_lock);
+	REMOVE_LINKS(tsk);
+	tsk->parent = tsk->real_parent = child_reaper;
+	SET_LINKS(tsk);
+	write_unlock_irq(&tasklist_lock);
+}
+#endif
+
 fastcall NORET_TYPE void do_exit(long code)
 {
 	struct task_struct *tsk = current;
@@ -815,14 +933,22 @@ fastcall NORET_TYPE void do_exit(long co
 		panic("Aiee, killing interrupt handler!");
 	if (unlikely(!tsk->pid))
 		panic("Attempted to kill the idle task!");
+#ifdef CONFIG_VE
+	do_initproc_exit();
+#else
 	if (unlikely(tsk->pid == 1))
 		panic("Attempted to kill init!");
+#endif
 	if (tsk->io_context)
 		exit_io_context();
 
+	(void)virtinfo_gencall(VIRTINFO_DOEXIT, NULL);
+
 	if (unlikely(current->ptrace & PT_TRACE_EXIT)) {
 		current->ptrace_message = code;
+		set_pn_state(current, PN_STOP_EXIT);
 		ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP);
+		clear_pn_state(current);
 	}
 
 	/*
@@ -880,9 +1006,16 @@ fastcall NORET_TYPE void do_exit(long co
 		module_put(tsk->binfmt->module);
 
 	tsk->exit_code = code;
-	proc_exit_connector(tsk);
 	pagg_detach(tsk);
-	exit_notify(tsk);
+	if (!(tsk->flags & PF_EXIT_RESTART)) {
+		proc_exit_connector(tsk);
+		exit_notify(tsk);
+	} else {
+		write_lock_irq(&tasklist_lock);
+		tsk->exit_state = EXIT_ZOMBIE;
+		nr_zombie++;
+		write_unlock_irq(&tasklist_lock);
+	}
 #ifdef CONFIG_NUMA
 	mpol_free(tsk->mempolicy);
 	tsk->mempolicy = NULL;
@@ -922,7 +1055,14 @@ asmlinkage long sys_exit(int error_code)
 
 task_t fastcall *next_thread(const task_t *p)
 {
-	return pid_task(p->pids[PIDTYPE_TGID].pid_list.next, PIDTYPE_TGID);
+	task_t *tsk;
+
+	tsk = pid_task(p->pids[PIDTYPE_TGID].pid_list.next, PIDTYPE_TGID);
+#ifdef CONFIG_VE
+	/* all threads should belong to ONE ve! */
+	BUG_ON(VE_TASK_INFO(tsk)->owner_env != VE_TASK_INFO(p)->owner_env);
+#endif
+	return tsk;
 }
 
 EXPORT_SYMBOL(next_thread);
@@ -971,14 +1111,19 @@ asmlinkage void sys_exit_group(int error
 static int eligible_child(pid_t pid, int options, task_t *p)
 {
 	if (pid > 0) {
-		if (p->pid != pid)
+		if ((is_virtual_pid(pid) ? virt_pid(p) : p->pid) != pid)
 			return 0;
 	} else if (!pid) {
 		if (process_group(p) != process_group(current))
 			return 0;
 	} else if (pid != -1) {
-		if (process_group(p) != -pid)
-			return 0;
+		if (__is_virtual_pid(-pid)) {
+			if (virt_pgid(p) != -pid)
+				return 0;
+		} else {
+			if (process_group(p) != -pid)
+				return 0;
+		}
 	}
 
 	/*
@@ -1048,7 +1193,7 @@ static int wait_task_zombie(task_t *p, i
 	int status;
 
 	if (unlikely(noreap)) {
-		pid_t pid = p->pid;
+		pid_t pid = get_task_pid(p);
 		uid_t uid = p->uid;
 		int exit_code = p->exit_code;
 		int why, status;
@@ -1160,7 +1305,7 @@ static int wait_task_zombie(task_t *p, i
 			retval = put_user(status, &infop->si_status);
 	}
 	if (!retval && infop)
-		retval = put_user(p->pid, &infop->si_pid);
+		retval = put_user(get_task_pid(p), &infop->si_pid);
 	if (!retval && infop)
 		retval = put_user(p->uid, &infop->si_uid);
 	if (retval) {
@@ -1168,7 +1313,7 @@ static int wait_task_zombie(task_t *p, i
 		p->exit_state = EXIT_ZOMBIE;
 		return retval;
 	}
-	retval = p->pid;
+	retval = get_task_pid(p);
 	if (p->real_parent != p->parent) {
 		write_lock_irq(&tasklist_lock);
 		/* Double-check with lock held.  */
@@ -1228,7 +1373,7 @@ static int wait_task_stopped(task_t *p, 
 	read_unlock(&tasklist_lock);
 
 	if (unlikely(noreap)) {
-		pid_t pid = p->pid;
+		pid_t pid = get_task_pid(p);
 		uid_t uid = p->uid;
 		int why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED;
 
@@ -1299,11 +1444,11 @@ bail_ref:
 	if (!retval && infop)
 		retval = put_user(exit_code, &infop->si_status);
 	if (!retval && infop)
-		retval = put_user(p->pid, &infop->si_pid);
+		retval = put_user(get_task_pid(p), &infop->si_pid);
 	if (!retval && infop)
 		retval = put_user(p->uid, &infop->si_uid);
 	if (!retval)
-		retval = p->pid;
+		retval = get_task_pid(p);
 	put_task_struct(p);
 
 	BUG_ON(!retval);
@@ -1340,7 +1485,7 @@ static int wait_task_continued(task_t *p
 		p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
 	spin_unlock_irq(&p->sighand->siglock);
 
-	pid = p->pid;
+	pid = get_task_pid(p);
 	uid = p->uid;
 	get_task_struct(p);
 	read_unlock(&tasklist_lock);
@@ -1351,7 +1496,7 @@ static int wait_task_continued(task_t *p
 		if (!retval && stat_addr)
 			retval = put_user(0xffff, stat_addr);
 		if (!retval)
-			retval = p->pid;
+			retval = get_task_pid(p);
 	} else {
 		retval = wait_noreap_copyout(p, pid, uid,
 					     CLD_CONTINUED, SIGCONT,
@@ -1585,6 +1730,7 @@ asmlinkage long sys_wait4(pid_t pid, int
 	prevent_tail_call(ret);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(sys_wait4);
 
 #ifdef __ARCH_WANT_SYS_WAITPID
 
diff -upr linux-2.6.16.46-0.12.orig/kernel/fairsched.c linux-2.6.16.46-0.12-027test011/kernel/fairsched.c
--- linux-2.6.16.46-0.12.orig/kernel/fairsched.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/fairsched.c	2007-08-28 17:35:34.000000000 +0400
@@ -0,0 +1,1389 @@
+/*
+ * Fair Scheduler
+ *
+ * Copyright (C) 2000-2005  SWsoft
+ * All rights reserved.
+ *
+ * Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * Start-tag scheduling follows the theory presented in
+ * http://www.cs.utexas.edu/users/dmcl/papers/ps/SIGCOMM96.ps
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <asm/timex.h>
+#include <asm/atomic.h>
+#include <linux/spinlock.h>
+#include <asm/semaphore.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/fs.h>
+#include <linux/dcache.h>
+#include <linux/sysctl.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/sched.h>
+#include <linux/console.h>
+#include <linux/fairsched.h>
+#include <linux/vsched.h>
+
+#ifdef CONFIG_USER_RESOURCE
+#include <ub/ub_mem.h>
+#else
+#define ub_vmalloc vmalloc
+#endif
+
+/* we need it for vsched routines in sched.c */
+spinlock_t fairsched_lock = SPIN_LOCK_UNLOCKED;
+
+#ifdef CONFIG_FAIRSCHED
+
+#define FAIRSHED_DEBUG		" debug"
+
+
+/*********************************************************************/
+/*
+ * Special arithmetics
+ */
+/*********************************************************************/
+
+#define CYCLES_SHIFT (8)
+#define SCYCLES_TIME(time) \
+        ((scycles_t) {((time) + (1 << CYCLES_SHIFT) - 1)  >> CYCLES_SHIFT})
+
+#define CYCLES_ZERO (0)
+static inline int CYCLES_BEFORE(cycles_t x, cycles_t y)
+{
+        return (__s64)(x-y) < 0;
+}
+static inline int CYCLES_AFTER(cycles_t x, cycles_t y)
+{
+        return (__s64)(y-x) < 0;
+}
+static inline void CYCLES_DADD(cycles_t *x, fschdur_t y) {*x+=y.d;}
+
+/*
+ * fairsched_schedule() can be called rarely than on each timer tick 
+ * due to main scheduler optimizations, so new abstract timeslice must
+ * be introduced. It can have arbitrary number ot cycles, but main
+ * scheduler mustn't exceed this value and call fairsched scheduler
+ * before this timeslice is expired on a node.
+ */
+static cycles_t cycles_per_timeslice;
+#define FSCHDUR_ZERO (0)
+#define TICK_DUR ((fschdur_t){cycles_per_timeslice})
+static inline fschdur_t FSCHDURATION(cycles_t x, cycles_t y)
+{
+	return (fschdur_t){x - y};
+}
+static inline int FSCHDUR_CMP(fschdur_t x, fschdur_t y)
+{
+	if (x.d < y.d) return -1;
+	if (x.d > y.d) return 1;
+	return 0;
+}
+static inline fschdur_t FSCHDUR_SUB(fschdur_t x, fschdur_t y)
+{
+	return (fschdur_t){x.d - y.d};
+}
+
+#define FSCHTAG_ZERO ((fschtag_t){0})
+static inline int FSCHTAG_CMP(fschtag_t x, fschtag_t y)
+{
+	if (x.t < y.t) return -1;
+	if (x.t > y.t) return 1;
+	return 0;
+}
+static inline fschtag_t FSCHTAG_MAX(fschtag_t x, fschtag_t y)
+{
+	return x.t >= y.t ? x : y;
+}
+static inline int FSCHTAG_DADD(fschtag_t *tag, fschdur_t dur, unsigned w)
+{
+	cycles_t new_tag;
+	new_tag = tag->t + (cycles_t)dur.d * w;
+	if (new_tag < tag->t)
+		return -1;
+	/* DEBUG */
+	if (new_tag >= (1ULL << 48))
+		return -1;
+	tag->t = new_tag;
+	return 0;
+}
+static inline int FSCHTAG_ADD(fschtag_t *tag, fschtag_t y)
+{
+	cycles_t new_tag;
+	new_tag = tag->t + y.t;
+	if (new_tag < tag->t)
+		return -1;
+	tag->t = new_tag;
+	return 0;
+}
+static inline fschtag_t FSCHTAG_SUB(fschtag_t x, fschtag_t y)
+{
+	return (fschtag_t){x.t - y.t};
+}
+
+#define FSCHVALUE_FMT "%Lu"
+#define FSCHVALUE_PRINT(x) ((x).v)
+#define FSCHVALUE_ZERO ((fschvalue_t){0})
+#define TICK_VALUE ((fschvalue_t)	\
+	{(cycles_t)cycles_per_timeslice << FSCHRATE_SHIFT})
+static inline fschvalue_t FSCHVALUE(unsigned long t)
+{
+	return (fschvalue_t){(cycles_t)t << FSCHRATE_SHIFT};
+}
+static inline int FSCHVALUE_CMP(fschvalue_t x, fschvalue_t y)
+{
+	if (x.v < y.v) return -1;
+	if (x.v > y.v) return 1;
+	return 0;
+}
+static inline void FSCHVALUE_DADD(fschvalue_t *val, fschdur_t dur,
+		unsigned rate)
+{
+	val->v += (cycles_t)dur.d * rate;
+}
+static inline fschvalue_t FSCHVALUE_SUB(fschvalue_t x, fschvalue_t y)
+{
+	return (fschvalue_t){x.v - y.v};
+}
+static inline cycles_t FSCHVALUE_TO_DELAY(fschvalue_t val, unsigned rate)
+{
+	unsigned long t;
+	/*
+	 * Here we lose precision to make the division 32-bit on IA-32.
+	 * The value is not greater than TICK_VALUE.
+	 * (TICK_VALUE >> FSCHRATE_SHIFT) fits unsigned long.
+	 */
+	t = (val.v + (1 << FSCHRATE_SHIFT) - 1) >> FSCHRATE_SHIFT;
+	return (cycles_t)((t + rate - 1) / rate) << FSCHRATE_SHIFT;
+}
+
+
+/*********************************************************************/
+/*
+ * Global data
+ */
+/*********************************************************************/
+
+/*
+ * Assertions.
+ * Called with preemption disabled.
+ */
+
+#define fsch_assert(x)							\
+	do {								\
+		static int count;					\
+		if (x)							\
+			break;						\
+		if (count++ > 10)					\
+			break;						\
+		__printk_no_wake++;					\
+		printk("fsch_assert " #x " failed\n");			\
+		__printk_no_wake--;					\
+	} while (0)
+
+#define fsch_validate(x, fmt...)					\
+	do {								\
+		static int count;					\
+		if (x)							\
+			break;						\
+		if (count++ > 10)					\
+			break;						\
+		__printk_no_wake++;					\
+		printk("fsch_assert " #x " failed\n");			\
+		printk("fsch_assert: " fmt);				\
+		__printk_no_wake--;					\
+	} while (0)
+
+/*
+ * Configurable parameters
+ */
+unsigned fairsched_max_latency = 25; /* jiffies */
+
+/*
+ * Parameters initialized at startup
+ */
+/* Number of online CPUs */
+unsigned fairsched_nr_cpus;
+/* Token Bucket depth (burst size) */
+static fschvalue_t max_value;
+
+struct fairsched_node fairsched_init_node = {
+	.id		= INT_MAX,
+#ifdef CONFIG_VE
+	.owner_env	= get_ve0(),
+#endif
+	.weight		= 1,
+};
+EXPORT_SYMBOL(fairsched_init_node);
+
+struct fairsched_node fairsched_idle_node = {
+	.id =			-1,
+};
+
+static int fairsched_nr_nodes;
+static LIST_HEAD(fairsched_node_head);
+static LIST_HEAD(fairsched_running_head);
+static LIST_HEAD(fairsched_delayed_head);
+
+DEFINE_PER_CPU(cycles_t, prev_schedule);
+static fschtag_t max_latency;
+
+static DEFINE_MUTEX(fairsched_mutex);
+
+/*********************************************************************/
+/*
+ * Small helper routines
+ */
+/*********************************************************************/
+
+/* this didn't proved to be very valuable statistics... */
+#define fairsched_inc_ve_strv(node, cycles)  do {} while(0)
+#define fairsched_dec_ve_strv(node, cycles)  do {} while(0)
+
+/*********************************************************************/
+/*
+ * Runlist management
+ */
+/*********************************************************************/
+
+/*
+ * Returns the start_tag of the first runnable node, or 0.
+ */
+static inline fschtag_t virtual_time(void)
+{
+	struct fairsched_node *p;
+
+	if (!list_empty(&fairsched_running_head)) {
+		p = list_first_entry(&fairsched_running_head,
+				struct fairsched_node, runlist);
+		return p->start_tag;
+	}
+	return FSCHTAG_ZERO;
+}
+
+static void fairsched_recompute_max_latency(void)
+{
+	struct fairsched_node *p;
+	unsigned w;
+	fschtag_t tag;
+
+	w = FSCHWEIGHT_MAX;
+	for_each_fairsched_node(p) {
+		if (p->weight < w)
+			w = p->weight;
+	}
+	tag = FSCHTAG_ZERO;
+	(void) FSCHTAG_DADD(&tag, TICK_DUR,
+				fairsched_nr_cpus * fairsched_max_latency * w);
+	max_latency = tag;
+}
+
+static void fairsched_reset_start_tags(void)
+{
+	struct fairsched_node *cnode;
+	fschtag_t min_tag;
+
+	min_tag = virtual_time();
+	for_each_fairsched_node(cnode) {
+		if (FSCHTAG_CMP(cnode->start_tag, min_tag) > 0)
+			cnode->start_tag = FSCHTAG_SUB(cnode->start_tag,
+						       min_tag);
+		else
+			cnode->start_tag = FSCHTAG_ZERO;
+	}
+}
+
+static void fairsched_running_insert(struct fairsched_node *node)
+{
+	struct list_head *tmp;
+	struct fairsched_node *p;
+	fschtag_t start_tag_max;
+
+	if (!list_empty(&fairsched_running_head)) {
+		start_tag_max = virtual_time();
+		if (!FSCHTAG_ADD(&start_tag_max, max_latency) &&
+		    FSCHTAG_CMP(start_tag_max, node->start_tag) < 0)
+			node->start_tag = start_tag_max;
+	}
+
+	list_for_each(tmp, &fairsched_running_head) {
+		p = list_entry(tmp, struct fairsched_node, runlist);
+		if (FSCHTAG_CMP(node->start_tag, p->start_tag) <= 0)
+			break;
+	}
+	/* insert node just before tmp */
+	list_add_tail(&node->runlist, tmp);
+}
+
+static inline void fairsched_running_insert_fromsleep(
+		struct fairsched_node *node)
+{
+	node->start_tag = FSCHTAG_MAX(node->start_tag, virtual_time());
+	fairsched_running_insert(node);
+}
+
+
+/*********************************************************************/
+/*
+ * CPU limiting helper functions
+ *
+ * These functions compute rates, delays and manipulate with sleep
+ * lists and so on.
+ */
+/*********************************************************************/
+
+/*
+ * Insert a node into the list of nodes removed from scheduling,
+ * sorted by the time at which the the node is allowed to run,
+ * historically called `delay'.
+ */
+static void fairsched_delayed_insert(struct fairsched_node *node)
+{
+	struct fairsched_node *p;
+	struct list_head *tmp;
+
+	list_for_each(tmp, &fairsched_delayed_head) {
+		p = list_entry(tmp, struct fairsched_node,
+				   runlist);
+		if (CYCLES_AFTER(p->delay, node->delay))
+			break;
+	}
+        /* insert node just before tmp */
+	list_add_tail(&node->runlist, tmp);
+}
+
+static inline void nodevalue_add(struct fairsched_node *node,
+		fschdur_t duration, unsigned rate)
+{
+	FSCHVALUE_DADD(&node->value, duration, rate);
+	if (FSCHVALUE_CMP(node->value, max_value) > 0)
+		node->value = max_value;
+}
+
+/*
+ * The node has been selected to run.
+ * This function accounts in advance for the time that the node will run.
+ * The advance not used by the node will be credited back.
+ */
+static void fairsched_ratelimit_charge_advance(
+		struct fairsched_node *node,
+		cycles_t time)
+{
+	fsch_assert(!node->delayed);
+	fsch_validate(FSCHVALUE_CMP(node->value, TICK_VALUE) >= 0,
+			"charge, value " FSCHVALUE_FMT
+			", tick " FSCHVALUE_FMT
+			", delay %Lu, time %Lu"
+			", lastupd %Lu, rate %u\n",
+			FSCHVALUE_PRINT(node->value),
+			FSCHVALUE_PRINT(TICK_VALUE),
+			node->delay, time,
+			node->last_updated_at, node->rate);
+
+	/*
+	 * Account for the time passed since last update.
+	 * It might be needed if the node has become runnable because of
+	 * a wakeup, but hasn't gone through other functions updating
+	 * the bucket value.
+	 */
+	if (CYCLES_AFTER(time, node->last_updated_at)) {
+		nodevalue_add(node, FSCHDURATION(time, node->last_updated_at),
+			      node->rate);
+		node->last_updated_at = time;
+	}
+
+	/* charge for the full tick the node might be running */
+	node->value = FSCHVALUE_SUB(node->value, TICK_VALUE);
+	if (FSCHVALUE_CMP(node->value, TICK_VALUE) < 0) {
+		list_del(&node->runlist);
+		node->delayed = 1;
+		node->delay = node->last_updated_at + FSCHVALUE_TO_DELAY(
+					FSCHVALUE_SUB(TICK_VALUE, node->value),
+					node->rate);
+		node->nr_ready = 0;
+		fairsched_delayed_insert(node);
+	}
+}
+
+static void fairsched_ratelimit_credit_unused(
+		struct fairsched_node *node,
+		cycles_t time, fschdur_t duration)
+{
+	/* account for the time passed since last update */
+	if (CYCLES_AFTER(time, node->last_updated_at)) {
+		nodevalue_add(node, FSCHDURATION(time, node->last_updated_at),
+			      node->rate);
+		node->last_updated_at = time;
+	}
+
+	/*
+	 * When the node was given this CPU, it was charged for 1 tick.
+	 * Credit back the unused time.
+	 */
+	if (FSCHDUR_CMP(duration, TICK_DUR) < 0)
+		nodevalue_add(node, FSCHDUR_SUB(TICK_DUR, duration),
+			      1 << FSCHRATE_SHIFT);
+
+	/* check if the node is allowed to run */
+	if (FSCHVALUE_CMP(node->value, TICK_VALUE) < 0) {
+		/*
+		 * The node was delayed and remain such.
+		 * But since the bucket value has been updated,
+		 * update the delay time and move the node in the list.
+		 */
+		fsch_assert(node->delayed);
+		node->delay = node->last_updated_at + FSCHVALUE_TO_DELAY(
+					FSCHVALUE_SUB(TICK_VALUE, node->value),
+					node->rate);
+	} else if (node->delayed) {
+		/*
+		 * The node was delayed, but now it is allowed to run.
+		 * We do not manipulate with lists, it will be done by the
+		 * caller.
+		 */
+		node->nr_ready = node->nr_runnable;
+		node->delayed = 0;
+	}
+}
+
+static void fairsched_delayed_wake(cycles_t time)
+{
+	struct fairsched_node *p;
+
+	while (!list_empty(&fairsched_delayed_head)) {
+		p = list_entry(fairsched_delayed_head.next,
+				  struct fairsched_node,
+				  runlist);
+		if (CYCLES_AFTER(p->delay, time))
+			break;
+
+		/* ok, the delay period is completed */
+		/* account for the time passed since last update */
+		if (CYCLES_AFTER(time, p->last_updated_at)) {
+			nodevalue_add(p, FSCHDURATION(time, p->last_updated_at),
+					p->rate);
+			p->last_updated_at = time;
+		}
+
+		fsch_validate(FSCHVALUE_CMP(p->value, TICK_VALUE) >= 0,
+				"wake, value " FSCHVALUE_FMT
+				", tick " FSCHVALUE_FMT
+				", delay %Lu, time %Lu"
+				", lastupd %Lu, rate %u\n",
+				FSCHVALUE_PRINT(p->value),
+				FSCHVALUE_PRINT(TICK_VALUE),
+				p->delay, time,
+				p->last_updated_at, p->rate);
+		p->nr_ready = p->nr_runnable;
+		p->delayed = 0;
+		list_del_init(&p->runlist);
+		if (p->nr_ready)
+			fairsched_running_insert_fromsleep(p);
+	}
+}
+
+static struct fairsched_node *fairsched_find(unsigned int id);
+
+void fairsched_cpu_online_map(int id, cpumask_t *mask)
+{
+	struct fairsched_node *node;
+
+	mutex_lock(&fairsched_mutex);
+	node = fairsched_find(id);
+	if (node == NULL)
+		*mask = CPU_MASK_NONE;
+	else
+		vsched_cpu_online_map(node->vsched, mask);
+	mutex_unlock(&fairsched_mutex);
+}
+
+/*********************************************************************/
+/*
+ * The heart of the algorithm:
+ * fairsched_incrun, fairsched_decrun, fairsched_schedule
+ *
+ * Note: old property nr_ready >= nr_pcpu doesn't hold anymore.
+ * However, nr_runnable, nr_ready and delayed are maintained in sync.
+ */
+/*********************************************************************/
+
+/*
+ * Called on a wakeup inside the node.
+ */
+void fairsched_incrun(struct fairsched_node *node)
+{
+	if (!node->delayed && !node->nr_ready++)
+		/* the node wasn't on the running list, insert */
+		fairsched_running_insert_fromsleep(node);
+	node->nr_runnable++;
+}
+
+/*
+ * Called from inside schedule() when a sleeping state is entered.
+ */
+void fairsched_decrun(struct fairsched_node *node)
+{
+	if (!node->delayed && !--node->nr_ready)
+		/* nr_ready changed 1->0, remove from the running list */
+		list_del_init(&node->runlist);
+	--node->nr_runnable;
+}
+
+void fairsched_inccpu(struct fairsched_node *node)
+{
+	node->nr_pcpu++;
+	fairsched_dec_ve_strv(node, cycles);
+}
+
+static inline void __fairsched_deccpu(struct fairsched_node *node)
+{
+	node->nr_pcpu--;
+	fairsched_inc_ve_strv(node, cycles);
+}
+
+void fairsched_deccpu(struct fairsched_node *node)
+{
+	if (node == &fairsched_idle_node)
+		return;
+
+	__fairsched_deccpu(node);
+}
+
+static void fairsched_account(struct fairsched_node *node,
+		cycles_t time)
+{
+	fschdur_t duration;
+
+	duration = FSCHDURATION(time, __get_cpu_var(prev_schedule));
+#ifdef CONFIG_VE
+	CYCLES_DADD(&node->owner_env->cpu_used_ve, duration);
+#endif
+
+	/*
+	 * The duration is not greater than TICK_DUR since
+	 * task->need_resched is always 1.
+	 */
+	if (FSCHTAG_DADD(&node->start_tag, duration, node->weight)) {
+		fairsched_reset_start_tags();
+		(void) FSCHTAG_DADD(&node->start_tag, duration,
+					node->weight);
+	}
+
+	list_del_init(&node->runlist);
+	if (node->rate_limited)
+		fairsched_ratelimit_credit_unused(node, time, duration);
+	if (!node->delayed) {
+		if (node->nr_ready)
+			fairsched_running_insert(node);
+	} else
+		fairsched_delayed_insert(node);
+}
+
+/*
+ * Scheduling decision
+ *
+ * Updates CPU usage for the node releasing the CPU and selects a new node.
+ */
+struct fairsched_node *fairsched_schedule(
+		struct fairsched_node *prev_node,
+		struct fairsched_node *cur_node,
+		int cur_node_active,
+		cycles_t time)
+{
+	struct fairsched_node *p;
+
+	if (prev_node != &fairsched_idle_node)
+		fairsched_account(prev_node, time);
+	__get_cpu_var(prev_schedule) = time;
+
+	fairsched_delayed_wake(time);
+
+	list_for_each_entry(p, &fairsched_running_head, runlist) {
+		if (p->nr_pcpu < p->nr_ready ||
+		    (cur_node_active && p == cur_node)) {
+			if (p->rate_limited)
+				fairsched_ratelimit_charge_advance(p, time);
+			return p;
+		}
+	}
+	return NULL;
+}
+
+
+/*********************************************************************/
+/*
+ * System calls
+ *
+ * All do_xxx functions are called under fairsched semaphore and after
+ * capability check.
+ *
+ * The binary interfaces follow some other Fair Scheduler implementations
+ * (although some system call arguments are not needed for our implementation).
+ */
+/*********************************************************************/
+
+static struct fairsched_node *fairsched_find(unsigned int id)
+{
+	struct fairsched_node *p;
+
+	for_each_fairsched_node(p) {
+		if (p->id == id)
+			return p;
+	}
+	return NULL;
+}
+
+static int do_fairsched_mknod(unsigned int parent, unsigned int weight,
+		unsigned int newid)
+{
+	struct fairsched_node *node;
+	int retval;
+
+	retval = -EINVAL;
+	if (weight < 1 || weight > FSCHWEIGHT_MAX)
+		goto out;
+	if (newid < 0 || newid > INT_MAX)
+		goto out;
+
+	retval = -EBUSY;
+	if (fairsched_find(newid) != NULL)
+		goto out;
+
+	retval = -ENOMEM;
+	node = kmalloc(sizeof(*node), GFP_KERNEL);
+	if (node == NULL)
+		goto out;
+
+	memset(node, 0, sizeof(*node));
+	node->weight = weight;
+	INIT_LIST_HEAD(&node->runlist);
+	node->id = newid;
+	node->vcpus = 0;
+#ifdef CONFIG_VE
+	node->owner_env = get_exec_env();
+#endif
+
+	spin_lock_irq(&fairsched_lock);
+	list_add(&node->nodelist, &fairsched_node_head);
+	fairsched_nr_nodes++;
+	fairsched_recompute_max_latency();
+	spin_unlock_irq(&fairsched_lock);
+
+	retval = newid;
+out:
+	return retval;
+}
+
+asmlinkage int sys_fairsched_mknod(unsigned int parent, unsigned int weight,
+				    unsigned int newid)
+{
+	int retval;
+
+	if (!capable(CAP_SETVEID))
+		return -EPERM;
+
+	mutex_lock(&fairsched_mutex);
+	retval = do_fairsched_mknod(parent, weight, newid);
+	mutex_unlock(&fairsched_mutex);
+
+	return retval;
+}
+EXPORT_SYMBOL(sys_fairsched_mknod);
+
+static int do_fairsched_rmnod(unsigned int id)
+{
+	struct fairsched_node *node;
+	int retval;
+
+	retval = -EINVAL;
+	node = fairsched_find(id);
+	if (node == NULL)
+		goto out;
+	if (node == &fairsched_init_node)
+		goto out;
+
+	retval = vsched_destroy(node->vsched);
+	if (retval)
+		goto out;
+
+	spin_lock_irq(&fairsched_lock);
+	list_del(&node->runlist); /* required for delayed nodes */
+	list_del(&node->nodelist);
+	fairsched_nr_nodes--;
+	fairsched_recompute_max_latency();
+	spin_unlock_irq(&fairsched_lock);
+
+	kfree(node);
+	retval = 0;
+out:
+	return retval;
+}
+
+asmlinkage int sys_fairsched_rmnod(unsigned int id)
+{
+	int retval;
+
+	if (!capable(CAP_SETVEID))
+		return -EPERM;
+
+	mutex_lock(&fairsched_mutex);
+	retval = do_fairsched_rmnod(id);
+	mutex_unlock(&fairsched_mutex);
+
+	return retval;
+}
+EXPORT_SYMBOL(sys_fairsched_rmnod);
+
+int do_fairsched_chwt(unsigned int id, unsigned weight)
+{
+	struct fairsched_node *node;
+
+	if (id == 0)
+		return -EINVAL;
+	if (weight < 1 || weight > FSCHWEIGHT_MAX)
+		return -EINVAL;
+
+	node = fairsched_find(id);
+	if (node == NULL)
+		return -ENOENT;
+
+	spin_lock_irq(&fairsched_lock);
+	node->weight = weight;
+	fairsched_recompute_max_latency();
+	spin_unlock_irq(&fairsched_lock);
+
+	return 0;
+}
+
+asmlinkage int sys_fairsched_chwt(unsigned int id, unsigned weight)
+{
+	int retval;
+
+	if (!capable(CAP_SETVEID))
+		return -EPERM;
+
+	mutex_lock(&fairsched_mutex);
+	retval = do_fairsched_chwt(id, weight);
+	mutex_unlock(&fairsched_mutex);
+
+	return retval;
+}
+
+int do_fairsched_vcpus(unsigned int id, unsigned int vcpus)
+{
+	struct fairsched_node *node;
+	int ret = 0;
+
+	if (id == 0)
+		return -EINVAL;
+
+	node = fairsched_find(id);
+	if (node == NULL)
+		return -ENOENT;
+
+	if (vcpus < 1 || vcpus > num_online_cpus())
+		vcpus = num_online_cpus();
+
+	node->vcpus = vcpus;
+	if (node->vsched != NULL) {
+		ret = vsched_set_vcpus(node->vsched, vcpus);
+		/* FIXME: adjust rate ... */
+	}
+
+	return ret;
+}
+
+asmlinkage int sys_fairsched_vcpus(unsigned int id, unsigned int vcpus)
+{
+	int retval;
+
+	if (!capable(CAP_SETVEID))
+		return -EPERM;
+
+	mutex_lock(&fairsched_mutex);
+	retval = do_fairsched_vcpus(id, vcpus);
+	mutex_unlock(&fairsched_mutex);
+
+	return retval;
+}
+EXPORT_SYMBOL(sys_fairsched_vcpus);
+
+int do_fairsched_rate(unsigned int id, int op, unsigned rate)
+{
+	struct fairsched_node *node;
+	cycles_t time;
+	int retval;
+
+	if (id == 0)
+		return -EINVAL;
+	if (op == FAIRSCHED_SET_RATE && (rate < 1 || rate >= (1UL << 31)))
+		return -EINVAL;
+
+	node = fairsched_find(id);
+	if (node == NULL)
+		return -ENOENT;
+
+	retval = -EINVAL;
+	spin_lock_irq(&fairsched_lock);
+	time = get_cycles();
+	switch (op) {
+		case FAIRSCHED_SET_RATE:
+			node->rate = rate;
+			if (node->rate > (fairsched_nr_cpus << FSCHRATE_SHIFT))
+				node->rate =
+					fairsched_nr_cpus << FSCHRATE_SHIFT;
+			node->rate_limited = 1;
+			node->value = max_value;
+			if (node->delayed) {
+				list_del(&node->runlist);
+				node->delay = time;
+				fairsched_delayed_insert(node);
+				node->last_updated_at = time;
+				fairsched_delayed_wake(time);
+			}
+			retval = node->rate;
+			break;
+		case FAIRSCHED_DROP_RATE:
+			node->rate = 0; /* This assignment is not needed
+					   for the kernel code, and it should
+					   not rely on rate being 0 when it's
+					   unset.  This is a band-aid for some
+					   existing tools (don't know which one
+					   exactly).  --SAW */
+			node->rate_limited = 0;
+			node->value = max_value;
+			if (node->delayed) {
+				list_del(&node->runlist);
+				node->delay = time;
+				fairsched_delayed_insert(node);
+				node->last_updated_at = time;
+				fairsched_delayed_wake(time);
+			}
+			retval = 0;
+			break;
+		case FAIRSCHED_GET_RATE:
+			if (node->rate_limited)
+				retval = node->rate;
+			else
+				retval = -ENODATA;
+			break;
+	}
+	spin_unlock_irq(&fairsched_lock);
+
+	return retval;
+}
+
+asmlinkage int sys_fairsched_rate(unsigned int id, int op, unsigned rate)
+{
+	int retval;
+
+	if (!capable(CAP_SETVEID))
+		return -EPERM;
+
+	mutex_lock(&fairsched_mutex);
+	retval = do_fairsched_rate(id, op, rate);
+	mutex_unlock(&fairsched_mutex);
+
+	return retval;
+}
+
+/*
+ * Called under fairsched_mutex.
+ */
+static int __do_fairsched_mvpr(struct task_struct *p,
+		struct fairsched_node *node)
+{
+	int retval;
+
+	if (node->vsched == NULL) {
+		retval = vsched_create(node->id, node);
+		if (retval < 0)
+			return retval;
+	}
+
+	/* no need to destroy vsched in case of mvpr failure */
+	return vsched_mvpr(p, node->vsched);
+}
+
+int do_fairsched_mvpr(pid_t pid, unsigned int nodeid)
+{
+	struct task_struct *p;
+	struct fairsched_node *node;
+	int retval;
+
+	retval = -ENOENT;
+	node = fairsched_find(nodeid);
+	if (node == NULL)
+		goto out;
+
+	read_lock(&tasklist_lock);
+	retval = -ESRCH;
+	p = find_task_by_pid_all(pid);
+	if (p == NULL)
+		goto out_unlock;
+	get_task_struct(p);
+	read_unlock(&tasklist_lock);
+
+	retval = __do_fairsched_mvpr(p, node);
+	put_task_struct(p);
+	return retval;
+
+out_unlock:
+	read_unlock(&tasklist_lock);
+out:
+	return retval;
+}
+
+asmlinkage int sys_fairsched_mvpr(pid_t pid, unsigned int nodeid)
+{
+	int retval;
+
+	if (!capable(CAP_SETVEID))
+		return -EPERM;
+
+	mutex_lock(&fairsched_mutex);
+	retval = do_fairsched_mvpr(pid, nodeid);
+	mutex_unlock(&fairsched_mutex);
+
+	return retval;
+}
+EXPORT_SYMBOL(sys_fairsched_mvpr);
+
+
+/*********************************************************************/
+/*
+ * proc interface
+ */
+/*********************************************************************/
+
+struct fairsched_node_dump {
+#ifdef CONFIG_VE
+	envid_t veid;
+#endif
+	int id;
+	unsigned weight;
+	unsigned rate;
+	unsigned rate_limited : 1,
+		 delayed : 1;
+	fschtag_t start_tag;
+	fschvalue_t value;
+	cycles_t delay;
+	int nr_ready;
+	int nr_runnable;
+	int nr_pcpu;
+	int nr_tasks, nr_runtasks;
+};
+
+struct fairsched_dump {
+	int len, compat;
+	struct fairsched_node_dump nodes[0];
+};
+
+static struct fairsched_dump *fairsched_do_dump(int compat)
+{
+	int nr_nodes;
+	int len, i;
+	struct fairsched_dump *dump;
+	struct fairsched_node *node;
+	struct fairsched_node_dump *p;
+	unsigned long flags;
+
+start:
+	nr_nodes = (ve_is_super(get_exec_env()) ? fairsched_nr_nodes + 16 : 1);
+	len = sizeof(*dump) + nr_nodes * sizeof(dump->nodes[0]);
+	dump = ub_vmalloc(len);
+	if (dump == NULL)
+		goto out;
+
+	spin_lock_irqsave(&fairsched_lock, flags);
+	if (ve_is_super(get_exec_env()) && nr_nodes < fairsched_nr_nodes)
+		goto repeat;
+	p = dump->nodes;
+	list_for_each_entry_reverse(node, &fairsched_node_head, nodelist) {
+		if ((char *)p - (char *)dump >= len)
+			break;
+		p->nr_tasks = 0;
+		p->nr_runtasks = 0;
+#ifdef CONFIG_VE
+		if (!ve_accessible(node->owner_env, get_exec_env()))
+			continue;
+		p->veid = node->owner_env->veid;
+		if (compat) {
+			p->nr_tasks = atomic_read(&node->owner_env->pcounter);
+			for_each_online_cpu(i)
+				p->nr_runtasks +=
+					VE_CPU_STATS(node->owner_env, i)
+								->nr_running;
+			if (p->nr_runtasks < 0)
+				p->nr_runtasks = 0;
+		}
+#endif
+		p->id = node->id;
+		p->weight = node->weight;
+		p->rate = node->rate;
+		p->rate_limited = node->rate_limited;
+		p->delayed = node->delayed;
+		p->start_tag = node->start_tag;
+		p->value = node->value;
+		p->delay = node->delay;
+		p->nr_ready = node->nr_ready;
+		p->nr_runnable = node->nr_runnable;
+		p->nr_pcpu = node->nr_pcpu;
+		p++;
+	}
+	dump->len = p - dump->nodes;
+	dump->compat = compat;
+	spin_unlock_irqrestore(&fairsched_lock, flags);
+
+out:
+	return dump;
+
+repeat:
+	spin_unlock_irqrestore(&fairsched_lock, flags);
+	vfree(dump);
+	goto start;
+}
+
+#define FAIRSCHED_PROC_HEADLINES 2
+
+#if defined(CONFIG_VE)
+/*
+ * File format is dictated by compatibility reasons.
+ */
+static int fairsched_seq_show(struct seq_file *m, void *v)
+{
+	struct fairsched_dump *dump;
+	struct fairsched_node_dump *p;
+	unsigned vid, nid, pid, r;
+
+	dump = m->private;
+	p = (struct fairsched_node_dump *)((unsigned long)v & ~3UL);
+	if (p - dump->nodes < FAIRSCHED_PROC_HEADLINES) {
+		if (p == dump->nodes)
+			seq_printf(m, "Version: 2.6 debug\n");
+		else if (p == dump->nodes + 1)
+			seq_printf(m,
+				       "      veid "
+				       "        id "
+				       "    parent "
+				       "weight "
+				       " rate "
+  				       "tasks "
+				       "  run "
+				       "cpus"
+				       " "
+				       "flg "
+				       "ready "
+				       "           start_tag "
+				       "               value "
+				       "               delay"
+				       "\n");
+	} else {
+		p -= FAIRSCHED_PROC_HEADLINES;
+		vid = nid = pid = 0;
+		r = (unsigned long)v & 3;
+		if (p == dump->nodes) {
+			if (r == 2)
+				nid = p->id;
+		} else {
+			if (!r)
+				nid = p->id;
+			else if (r == 1)
+				vid = pid = p->id;
+			else
+				vid = p->id, nid = 1;
+		}
+		seq_printf(m,
+			       "%10u "
+			       "%10u %10u %6u %5u %5u %5u %4u"
+			       " "
+			       " %c%c %5u %20Lu %20Lu %20Lu"
+			       "\n",
+			       vid,
+			       nid,
+			       pid,
+			       p->weight,
+			       p->rate,
+			       p->nr_tasks,
+			       p->nr_runtasks,
+			       p->nr_pcpu,
+			       p->rate_limited ? 'L' : '.',
+			       p->delayed ? 'D' : '.',
+			       p->nr_ready,
+			       (unsigned long long)p->start_tag.t,
+			       (unsigned long long)p->value.v,
+			       (unsigned long long)p->delay
+			       );
+	}
+
+	return 0;
+}
+
+static void *fairsched_seq_start(struct seq_file *m, loff_t *pos)
+{
+	struct fairsched_dump *dump;
+	unsigned long l;
+
+	dump = m->private;
+	if (*pos >= dump->len * 3 - 1 + FAIRSCHED_PROC_HEADLINES)
+		return NULL;
+	if (*pos < FAIRSCHED_PROC_HEADLINES)
+		return dump->nodes + *pos;
+	/* guess why... */
+	l = (unsigned long)(dump->nodes +
+		((unsigned long)*pos + FAIRSCHED_PROC_HEADLINES * 2 + 1) / 3);
+	l |= ((unsigned long)*pos + FAIRSCHED_PROC_HEADLINES * 2 + 1) % 3;
+	return (void *)l;
+}
+static void *fairsched_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return fairsched_seq_start(m, pos);
+}
+#endif
+
+static int fairsched2_seq_show(struct seq_file *m, void *v)
+{
+	struct fairsched_dump *dump;
+	struct fairsched_node_dump *p;
+
+	dump = m->private;
+	p = v;
+	if (p - dump->nodes < FAIRSCHED_PROC_HEADLINES) {
+		if (p == dump->nodes)
+			seq_printf(m, "Version: 2.7" FAIRSHED_DEBUG "\n");
+		else if (p == dump->nodes + 1)
+			seq_printf(m,
+				       "        id "
+				       "weight "
+				       " rate "
+				       "  run "
+				       "cpus"
+#ifdef FAIRSHED_DEBUG
+				       " "
+				       "flg "
+				       "ready "
+				       "           start_tag "
+				       "               value "
+				       "               delay"
+#endif
+				       "\n");
+	} else {
+		p -= FAIRSCHED_PROC_HEADLINES;
+		seq_printf(m,
+			       "%10u %6u %5u %5u %4u"
+#ifdef FAIRSHED_DEBUG
+			       " "
+			       " %c%c %5u %20Lu %20Lu %20Lu"
+#endif
+			       "\n",
+			       p->id,
+			       p->weight,
+			       p->rate,
+			       p->nr_runnable,
+			       p->nr_pcpu
+#ifdef FAIRSHED_DEBUG
+			       ,
+			       p->rate_limited ? 'L' : '.',
+			       p->delayed ? 'D' : '.',
+			       p->nr_ready,
+			       (unsigned long long)p->start_tag.t,
+			       (unsigned long long)p->value.v,
+			       (unsigned long long)p->delay
+#endif
+			       );
+	}
+
+	return 0;
+}
+
+static void *fairsched2_seq_start(struct seq_file *m, loff_t *pos)
+{
+	struct fairsched_dump *dump;
+
+	dump = m->private;
+	if (*pos >= dump->len + FAIRSCHED_PROC_HEADLINES)
+		return NULL;
+	return dump->nodes + *pos;
+}
+static void *fairsched2_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return fairsched2_seq_start(m, pos);
+}
+static void fairsched2_seq_stop(struct seq_file *m, void *v)
+{
+}
+
+#ifdef CONFIG_VE
+static struct seq_operations fairsched_seq_op = {
+	.start		= fairsched_seq_start,
+	.next		= fairsched_seq_next,
+	.stop		= fairsched2_seq_stop,
+	.show		= fairsched_seq_show
+};
+#endif
+static struct seq_operations fairsched2_seq_op = {
+	.start		= fairsched2_seq_start,
+	.next		= fairsched2_seq_next,
+	.stop		= fairsched2_seq_stop,
+	.show		= fairsched2_seq_show
+};
+static int fairsched_seq_open(struct inode *inode, struct file *file)
+{
+	int ret;
+	struct seq_file *m;
+	int compat;
+
+#ifdef CONFIG_VE
+	compat = (file->f_dentry->d_name.len == sizeof("fairsched") - 1);
+	ret = seq_open(file, compat ? &fairsched_seq_op : &fairsched2_seq_op);
+#else
+	compat = 0;
+	ret = seq_open(file, &fairsched2_seq_op);
+#endif
+	if (ret)
+		return ret;
+	m = file->private_data;
+	m->private = fairsched_do_dump(compat);
+	if (m->private == NULL) {
+		seq_release(inode, file);
+		ret = -ENOMEM;
+	}
+	return ret;
+}
+static int fairsched_seq_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	struct fairsched_dump *dump;
+
+	m = file->private_data;
+	dump = m->private;
+	m->private = NULL;
+	vfree(dump);
+	seq_release(inode, file);
+	return 0;
+}
+static struct file_operations proc_fairsched_operations = {
+	.open		= fairsched_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= fairsched_seq_release
+};
+
+
+/*********************************************************************/
+/*
+ * Fairsched initialization
+ */
+/*********************************************************************/
+
+int fsch_sysctl_latency(ctl_table *ctl, int write, struct file *filp,
+			void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int *valp = ctl->data;
+	int val = *valp;
+	int ret;
+
+	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+
+	if (!write || *valp == val)
+		return ret;
+
+	spin_lock_irq(&fairsched_lock);
+	fairsched_recompute_max_latency();
+	spin_unlock_irq(&fairsched_lock);
+	return ret;
+}
+
+static void fairsched_calibrate(void)
+{
+	fairsched_nr_cpus = num_online_cpus();
+	cycles_per_timeslice = msecs_to_jiffies(FSCH_TIMESLICE)
+							* cycles_per_jiffy;
+	max_value = FSCHVALUE(cycles_per_timeslice * (fairsched_nr_cpus + 1));
+}
+
+void __init fairsched_init_early(void)
+{
+	fairsched_init_node.vcpus = num_online_cpus();
+	list_add(&fairsched_init_node.nodelist, &fairsched_node_head);
+	fairsched_nr_nodes++;
+}
+
+/*
+ * Note: this function is execute late in the initialization sequence.
+ * We ourselves need calibrated cycles and initialized procfs...
+ * The consequence of this late initialization is that start tags are
+ * efficiently ignored and each node preempts others on insertion.
+ * But it isn't a problem (only init node can be runnable).
+ */
+void __init fairsched_init_late(void)
+{
+	struct proc_dir_entry *entry;
+
+	if (get_cycles() == 0)
+		panic("FAIRSCHED: no TSC!\n");
+	fairsched_calibrate();
+	fairsched_recompute_max_latency();
+
+	entry = create_proc_glob_entry("fairsched", S_IRUGO, NULL);
+	if (entry)
+		entry->proc_fops = &proc_fairsched_operations;
+	entry = create_proc_glob_entry("fairsched2", S_IRUGO, NULL);
+	if (entry)
+		entry->proc_fops = &proc_fairsched_operations;
+}
+
+
+#else /* CONFIG_FAIRSCHED */
+
+
+/*********************************************************************/
+/*
+ * No Fairsched
+ */
+/*********************************************************************/
+
+asmlinkage int sys_fairsched_mknod(unsigned int parent, unsigned int weight,
+				    unsigned int newid)
+{
+	return -ENOSYS;
+}
+
+asmlinkage int sys_fairsched_rmnod(unsigned int id)
+{
+	return -ENOSYS;
+}
+
+asmlinkage int sys_fairsched_chwt(unsigned int id, unsigned int weight)
+{
+	return -ENOSYS;
+}
+
+asmlinkage int sys_fairsched_mvpr(pid_t pid, unsigned int nodeid)
+{
+	return -ENOSYS;
+}
+
+asmlinkage int sys_fairsched_rate(unsigned int id, int op, unsigned rate)
+{
+	return -ENOSYS;
+}
+
+asmlinkage int sys_fairsched_vcpus(unsigned int id, unsigned int vcpus)
+{
+	return -ENOSYS;
+}
+
+void __init fairsched_init_late(void)
+{
+}
+
+#endif /* CONFIG_FAIRSCHED */
diff -upr linux-2.6.16.46-0.12.orig/kernel/fork.c linux-2.6.16.46-0.12-027test011/kernel/fork.c
--- linux-2.6.16.46-0.12.orig/kernel/fork.c	2007-08-24 19:28:37.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/fork.c	2007-08-28 17:35:36.000000000 +0400
@@ -20,6 +20,7 @@
 #include <linux/vmalloc.h>
 #include <linux/completion.h>
 #include <linux/namespace.h>
+#include <linux/file.h>
 #include <linux/personality.h>
 #include <linux/mempolicy.h>
 #include <linux/sem.h>
@@ -27,6 +28,7 @@
 #include <linux/key.h>
 #include <linux/binfmts.h>
 #include <linux/mman.h>
+#include <linux/virtinfo.h>
 #include <linux/fs.h>
 #include <linux/capability.h>
 #include <linux/cpu.h>
@@ -36,6 +38,7 @@
 #include <linux/syscalls.h>
 #include <linux/jiffies.h>
 #include <linux/futex.h>
+#include <linux/task_io_accounting_ops.h>
 #include <linux/rcupdate.h>
 #include <linux/ptrace.h>
 #include <linux/mount.h>
@@ -56,11 +59,16 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#include <ub/ub_vmpages.h>
+#include <ub/ub_misc.h>
+#include <ub/ub_oom.h>
+
 /*
  * Protected counters by write_lock_irq(&tasklist_lock)
  */
 unsigned long total_forks;	/* Handle normal Linux uptimes. */
 int nr_threads; 		/* The idle threads do not count.. */
+EXPORT_SYMBOL(nr_threads);
 
 int max_threads;		/* tunable limit on nr_threads */
 
@@ -120,11 +128,16 @@ void __put_task_struct_cb(struct rcu_hea
 	WARN_ON(atomic_read(&tsk->usage));
 	WARN_ON(tsk == current);
 
+	ub_task_put(tsk);
 	security_task_free(tsk);
 	free_uid(tsk->user);
 	put_group_info(tsk->group_info);
 	delayacct_tsk_free(tsk);
 
+#ifdef CONFIG_VE
+	put_ve(VE_TASK_INFO(tsk)->owner_env);
+	atomic_dec(&nr_dead);
+#endif
 	if (!profile_handoff_task(tsk))
 		free_task(tsk);
 }
@@ -139,7 +152,7 @@ void __init fork_init(unsigned long memp
 	/* create a slab on which task_structs can be allocated */
 	task_struct_cachep =
 		kmem_cache_create("task_struct", sizeof(struct task_struct),
-			ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL, NULL);
+			ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_UBC, NULL, NULL);
 #endif
 
 	/*
@@ -227,7 +240,12 @@ static inline int dup_mmap(struct mm_str
 								-pages);
 			continue;
 		}
+
 		charge = 0;
+		if (ub_memory_charge(mm, mpnt->vm_end - mpnt->vm_start,
+					mpnt->vm_flags & ~VM_LOCKED,
+					mpnt->vm_file, UB_HARD))
+			goto fail_noch;
 		if (mpnt->vm_flags & VM_ACCOUNT) {
 			unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
 			if (security_vm_enough_memory(len))
@@ -274,7 +292,7 @@ static inline int dup_mmap(struct mm_str
 		rb_parent = &tmp->vm_rb;
 
 		mm->map_count++;
-		retval = copy_page_range(mm, oldmm, mpnt);
+		retval = copy_page_range(mm, oldmm, tmp, mpnt);
 
 		if (tmp->vm_ops && tmp->vm_ops->open)
 			tmp->vm_ops->open(tmp);
@@ -294,6 +312,9 @@ out:
 fail_nomem_policy:
 	kmem_cache_free(vm_area_cachep, tmp);
 fail_nomem:
+	ub_memory_uncharge(mm, mpnt->vm_end - mpnt->vm_start,
+			mpnt->vm_flags & ~VM_LOCKED, mpnt->vm_file);
+fail_noch:
 	retval = -ENOMEM;
 	vm_unacct_memory(charge);
 	goto out;
@@ -319,12 +340,15 @@ static inline void mm_free_pgd(struct mm
 
  __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
 
+ EXPORT_SYMBOL(mmlist_lock);
+
 #define allocate_mm()	(kmem_cache_alloc(mm_cachep, SLAB_KERNEL))
 #define free_mm(mm)	(kmem_cache_free(mm_cachep, (mm)))
 
 #include <linux/init_task.h>
 
-static struct mm_struct * mm_init(struct mm_struct * mm)
+static struct mm_struct * mm_init(struct mm_struct * mm,
+		struct task_struct *tsk)
 {
 	atomic_set(&mm->mm_users, 1);
 	atomic_set(&mm->mm_count, 1);
@@ -339,11 +363,14 @@ static struct mm_struct * mm_init(struct
 	mm->ioctx_list = NULL;
 	mm->free_area_cache = TASK_UNMAPPED_BASE;
 	mm->cached_hole_size = ~0UL;
+	set_mm_ub(mm, tsk);
 
 	if (likely(!mm_alloc_pgd(mm))) {
 		mm->def_flags = 0;
 		return mm;
 	}
+
+	put_mm_ub(mm);
 	free_mm(mm);
 	return NULL;
 }
@@ -358,10 +385,11 @@ struct mm_struct * mm_alloc(void)
 	mm = allocate_mm();
 	if (mm) {
 		memset(mm, 0, sizeof(*mm));
-		mm = mm_init(mm);
+		mm = mm_init(mm, NULL);
 	}
 	return mm;
 }
+EXPORT_SYMBOL_GPL(mm_alloc);
 
 /*
  * Called when the last reference to the mm
@@ -373,8 +401,10 @@ void fastcall __mmdrop(struct mm_struct 
 	BUG_ON(mm == &init_mm);
 	mm_free_pgd(mm);
 	destroy_context(mm);
+	put_mm_ub(mm);
 	free_mm(mm);
 }
+EXPORT_SYMBOL_GPL(__mmdrop);
 
 /*
  * Decrement the use count and release all resources for an mm.
@@ -389,7 +419,10 @@ void mmput(struct mm_struct *mm)
 			list_del(&mm->mmlist);
 			spin_unlock(&mmlist_lock);
 		}
+		if (mm->oom_killed)
+			ub_oom_task_dead(current);
 		put_swap_token(mm);
+		(void) virtinfo_gencall(VIRTINFO_EXITMMAP, mm);
 		mmdrop(mm);
 	}
 }
@@ -477,7 +510,7 @@ static struct mm_struct *dup_mm(struct t
 
 	memcpy(mm, oldmm, sizeof(*mm));
 
-	if (!mm_init(mm))
+	if (!mm_init(mm, tsk))
 		goto fail_nomem;
 
 	if (init_new_context(tsk, mm))
@@ -504,6 +537,7 @@ fail_nocontext:
 	 * because it calls destroy_context()
 	 */
 	mm_free_pgd(mm);
+	put_mm_ub(mm);
 	free_mm(mm);
 	return NULL;
 }
@@ -909,7 +943,7 @@ asmlinkage long sys_set_tid_address(int 
 {
 	current->clear_child_tid = tidptr;
 
-	return current->pid;
+	return virt_pid(current);
 }
 
 /*
@@ -926,7 +960,7 @@ static task_t *copy_process(unsigned lon
 				 unsigned long stack_size,
 				 int __user *parent_tidptr,
 				 int __user *child_tidptr,
-				 int pid)
+				 int pid, long pid0)
 {
 	int retval;
 	struct task_struct *p = NULL;
@@ -958,11 +992,14 @@ static task_t *copy_process(unsigned lon
 	if (!p)
 		goto fork_out;
 
+	if (ub_task_charge(current, p))
+		goto bad_fork_charge;
+
 	retval = -EAGAIN;
 	if (atomic_read(&p->user->processes) >=
 			p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
 		if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
-				p->user != &root_user)
+				p->user->uid != 0)
 			goto bad_fork_free;
 	}
 
@@ -988,12 +1025,20 @@ static task_t *copy_process(unsigned lon
 	delayacct_tsk_init(p);	/* Must remain after dup_task_struct() */
 	copy_flags(clone_flags, p);
 	p->pid = pid;
+#ifdef CONFIG_VE
+	set_virt_pid(p, alloc_vpid(p->pid, pid0 ? : -1));
+	if (virt_pid(p) < 0)
+		goto bad_fork_cleanup_module;
+#endif
 	retval = -EFAULT;
 	if (clone_flags & CLONE_PARENT_SETTID)
-		if (put_user(p->pid, parent_tidptr))
+		if (put_user(virt_pid(p), parent_tidptr))
 			goto bad_fork_cleanup_delays_binfmt;
 
 	p->proc_dentry = NULL;
+#ifdef CONFIG_VE
+	p->ve_task_info.glob_proc_dentry = NULL;
+#endif
 
 	INIT_LIST_HEAD(&p->children);
 	INIT_LIST_HEAD(&p->sibling);
@@ -1011,6 +1056,7 @@ static task_t *copy_process(unsigned lon
 	p->wchar = 0;		/* I/O counter: bytes written */
 	p->syscr = 0;		/* I/O counter: read syscalls */
 	p->syscw = 0;		/* I/O counter: write syscalls */
+	task_io_accounting_init(p);
 	acct_clear_integrals(p);
 
  	p->it_virt_expires = cputime_zero;
@@ -1042,8 +1088,13 @@ static task_t *copy_process(unsigned lon
 #endif
 
 	p->tgid = p->pid;
-	if (clone_flags & CLONE_THREAD)
+	set_virt_tgid(p, virt_pid(p));
+	set_virt_pgid(p, virt_pgid(current));
+	set_virt_sid(p, virt_sid(current));
+	if (clone_flags & CLONE_THREAD) {
 		p->tgid = current->tgid;
+		set_virt_tgid(p, virt_tgid(current));
+	}
 
 	if ((retval = security_task_alloc(p)))
 		goto bad_fork_cleanup_policy;
@@ -1140,14 +1191,14 @@ static task_t *copy_process(unsigned lon
 			unlikely(!cpus_subset(p->cpus_allowed,cpu_online_map)))
 		p->cpus_allowed = current->cpus_allowed;
 	if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
-			!cpu_online(task_cpu(p))))
-		set_task_cpu(p, smp_processor_id());
+			!vcpu_online(task_cpu(p))))
+		set_task_cpu(p, task_cpu(current));
 
 	/*
 	 * Check for pending SIGKILL! The new thread should not be allowed
 	 * to slip out of an OOM kill. (or normal SIGKILL.)
 	 */
-	if (sigismember(&current->pending.signal, SIGKILL)) {
+	if (sigismember(&current->pending.signal, SIGKILL) && !pid0) {
 		write_unlock_irq(&tasklist_lock);
 		retval = -EINTR;
 		goto bad_fork_cleanup_namespace;
@@ -1210,6 +1261,12 @@ static task_t *copy_process(unsigned lon
 	if (unlikely(p->ptrace & PT_PTRACED))
 		__ptrace_link(p, current->parent);
 
+	pget_ve(p->ve_task_info.owner_env);
+	get_ve(p->ve_task_info.owner_env);
+#ifdef CONFIG_VE
+	SET_VE_LINKS(p);
+	seqcount_init(&p->ve_task_info.wakeup_lock);
+#endif
 	if (thread_group_leader(p)) {
 		p->signal->tty = current->signal->tty;
 		p->signal->pgrp = process_group(current);
@@ -1258,6 +1315,11 @@ bad_fork_cleanup_cpuset:
 #endif
 	cpuset_exit(p);
 bad_fork_cleanup_delays_binfmt:
+#ifdef CONFIG_VE
+	if (virt_pid(p) != p->pid && virt_pid(p) > 0)
+		free_vpid(virt_pid(p), get_exec_env());
+bad_fork_cleanup_module:
+#endif
 	delayacct_tsk_free(p);
 	if (p->binfmt)
 		module_put(p->binfmt->module);
@@ -1268,6 +1330,9 @@ bad_fork_cleanup_count:
 	atomic_dec(&p->user->processes);
 	free_uid(p->user);
 bad_fork_free:
+	ub_task_uncharge(p);
+	ub_task_put(p);
+bad_fork_charge:
 	free_task(p);
 fork_out:
 	return ERR_PTR(retval);
@@ -1284,7 +1349,7 @@ task_t * __devinit fork_idle(int cpu)
 	task_t *task;
 	struct pt_regs regs;
 
-	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0);
+	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0, 0);
 	if (!task)
 		return ERR_PTR(-ENOMEM);
 	init_idle(task, cpu);
@@ -1314,26 +1379,33 @@ static inline int fork_traceflag (unsign
  * It copies the process, and if successful kick-starts
  * it and waits for it to finish using the VM if required.
  */
-long do_fork(unsigned long clone_flags,
+long do_fork_pid(unsigned long clone_flags,
 	      unsigned long stack_start,
 	      struct pt_regs *regs,
 	      unsigned long stack_size,
 	      int __user *parent_tidptr,
-	      int __user *child_tidptr)
+	      int __user *child_tidptr,
+	      long pid0)
 {
 	struct task_struct *p;
 	int trace = 0;
-	long pid = alloc_pidmap();
+	long pid;
 
+	pid = virtinfo_gencall(VIRTINFO_DOFORK, (void *)clone_flags);
+	if (pid)
+		return pid;
+
+	pid = alloc_pidmap();
 	if (pid < 0)
-		return -EAGAIN;
+		goto out;
 	if (unlikely(current->ptrace)) {
 		trace = fork_traceflag (clone_flags);
 		if (trace)
 			clone_flags |= CLONE_PTRACE;
 	}
 
-	p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);
+	p = copy_process(clone_flags, stack_start, regs, stack_size,
+			parent_tidptr, child_tidptr, pid, pid0);
 	/*
 	 * Do this prior waking up the new thread - the thread pointer
 	 * might get invalid after that point, if the thread exits quickly.
@@ -1341,6 +1413,7 @@ long do_fork(unsigned long clone_flags,
 	if (!IS_ERR(p)) {
 		struct completion vfork;
 
+		pid = virt_pid(p);
 		if (clone_flags & CLONE_VFORK) {
 			p->vfork_done = &vfork;
 			init_completion(&vfork);
@@ -1354,6 +1427,7 @@ long do_fork(unsigned long clone_flags,
 			set_tsk_thread_flag(p, TIF_SIGPENDING);
 		}
 
+		(void)virtinfo_gencall(VIRTINFO_DOFORKRET, p);
 		if (!(clone_flags & CLONE_STOPPED))
 			wake_up_new_task(p, clone_flags);
 		else
@@ -1361,18 +1435,25 @@ long do_fork(unsigned long clone_flags,
 
 		if (unlikely (trace)) {
 			current->ptrace_message = pid;
+			set_pn_state(current, PN_STOP_FORK);
 			ptrace_notify ((trace << 8) | SIGTRAP);
+			clear_pn_state(current);
 		}
 
 		if (clone_flags & CLONE_VFORK) {
 			wait_for_completion(&vfork);
-			if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))
+			if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) {
+				set_pn_state(current, PN_STOP_VFORK);
 				ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
+				clear_pn_state(current);
+			}
 		}
 	} else {
 		free_pidmap(pid);
 		pid = PTR_ERR(p);
 	}
+out:
+	(void)virtinfo_gencall(VIRTINFO_DOFORKPOST, (void *)(long)pid);
 	return pid;
 }
 
@@ -1380,26 +1461,39 @@ long do_fork(unsigned long clone_flags,
 #define ARCH_MIN_MMSTRUCT_ALIGN 0
 #endif
 
+EXPORT_SYMBOL(do_fork_pid);
+
+long do_fork(unsigned long clone_flags,
+		unsigned long stack_start,
+		struct pt_regs *regs,
+		unsigned long stack_size,
+		int __user *parent_tidptr,
+		int __user *child_tidptr)
+{
+	return do_fork_pid(clone_flags, stack_start, regs, stack_size,
+			parent_tidptr, child_tidptr, 0);
+}
+
 void __init proc_caches_init(void)
 {
 	sighand_cachep = kmem_cache_create("sighand_cache",
 			sizeof(struct sighand_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
 	signal_cachep = kmem_cache_create("signal_cache",
 			sizeof(struct signal_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
 	files_cachep = kmem_cache_create("files_cache", 
 			sizeof(struct files_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
 	fs_cachep = kmem_cache_create("fs_cache", 
 			sizeof(struct fs_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
 	vm_area_cachep = kmem_cache_create("vm_area_struct",
 			sizeof(struct vm_area_struct), 0,
-			SLAB_PANIC, NULL, NULL);
+			SLAB_PANIC|SLAB_UBC, NULL, NULL);
 	mm_cachep = kmem_cache_create("mm_struct",
 			sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
 }
 
 
diff -upr linux-2.6.16.46-0.12.orig/kernel/hrtimer.c linux-2.6.16.46-0.12-027test011/kernel/hrtimer.c
--- linux-2.6.16.46-0.12.orig/kernel/hrtimer.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/hrtimer.c	2007-08-28 17:35:33.000000000 +0400
@@ -439,6 +439,7 @@ hrtimer_start(struct hrtimer *timer, kti
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(hrtimer_start);
 
 /**
  * hrtimer_try_to_cancel - try to deactivate a timer
@@ -467,6 +468,7 @@ int hrtimer_try_to_cancel(struct hrtimer
 	return ret;
 
 }
+EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
 
 /**
  * hrtimer_cancel - cancel a timer and wait for the handler to finish.
@@ -504,6 +506,7 @@ ktime_t hrtimer_get_remaining(const stru
 
 	return rem;
 }
+EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
 
 #ifdef CONFIG_NO_IDLE_HZ
 /**
@@ -675,7 +678,7 @@ void hrtimer_run_queues(void)
  * The current task state is guaranteed to be TASK_RUNNING when this
  * routine returns.
  */
-static ktime_t __sched
+ktime_t __sched
 schedule_hrtimer(struct hrtimer *timer, const enum hrtimer_mode mode)
 {
 	/* fn stays NULL, meaning single-shot wakeup: */
@@ -702,7 +705,7 @@ schedule_hrtimer_interruptible(struct hr
 	return schedule_hrtimer(timer, mode);
 }
 
-static long __sched nanosleep_restart(struct restart_block *restart)
+long __sched nanosleep_restart(struct restart_block *restart)
 {
 	struct timespec __user *rmtp;
 	struct timespec tu;
@@ -731,6 +734,7 @@ static long __sched nanosleep_restart(st
 	/* The other values in restart are already filled in */
 	return -ERESTART_RESTARTBLOCK;
 }
+EXPORT_SYMBOL_GPL(nanosleep_restart);
 
 long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
 		       const enum hrtimer_mode mode, const clockid_t clockid)
diff -upr linux-2.6.16.46-0.12.orig/kernel/irq/handle.c linux-2.6.16.46-0.12-027test011/kernel/irq/handle.c
--- linux-2.6.16.46-0.12.orig/kernel/irq/handle.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/kernel/irq/handle.c	2007-08-28 17:35:30.000000000 +0400
@@ -171,10 +171,10 @@ fastcall unsigned int __do_IRQ(unsigned 
 		spin_unlock(&desc->lock);
 
 		action_ret = handle_IRQ_event(irq, regs, action);
-
-		spin_lock(&desc->lock);
 		if (!noirqdebug)
 			note_interrupt(irq, desc, action_ret, regs);
+
+		spin_lock(&desc->lock);
 		if (likely(!(desc->status & IRQ_PENDING)))
 			break;
 		desc->status &= ~IRQ_PENDING;
diff -upr linux-2.6.16.46-0.12.orig/kernel/kmod.c linux-2.6.16.46-0.12-027test011/kernel/kmod.c
--- linux-2.6.16.46-0.12.orig/kernel/kmod.c	2007-08-24 19:28:08.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/kmod.c	2007-08-28 17:35:31.000000000 +0400
@@ -78,6 +78,10 @@ int request_module(const char *fmt, ...)
 #define MAX_KMOD_CONCURRENT 50	/* Completely arbitrary value - KAO */
 	static int kmod_loop_msg;
 
+	/* Don't allow request_module() inside VE. */
+	if (!ve_is_super(get_exec_env()))
+		return -EPERM;
+
 	va_start(args, fmt);
 	ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
 	va_end(args);
@@ -247,6 +251,9 @@ int call_usermodehelper_keys(char *path,
 	};
 	DECLARE_WORK(work, __call_usermodehelper, &sub_info);
 
+	if (!ve_is_super(get_exec_env()))
+		return -EPERM;
+
 	if (!khelper_wq)
 		return -EBUSY;
 
diff -upr linux-2.6.16.46-0.12.orig/kernel/kthread.c linux-2.6.16.46-0.12-027test011/kernel/kthread.c
--- linux-2.6.16.46-0.12.orig/kernel/kthread.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/kernel/kthread.c	2007-08-28 17:35:31.000000000 +0400
@@ -114,7 +114,7 @@ static void keventd_create_kthread(void 
 		create->result = ERR_PTR(pid);
 	} else {
 		wait_for_completion(&create->started);
-		create->result = find_task_by_pid(pid);
+		create->result = find_task_by_pid_all(pid);
 	}
 	complete(&create->done);
 }
diff -upr linux-2.6.16.46-0.12.orig/kernel/module.c linux-2.6.16.46-0.12-027test011/kernel/module.c
--- linux-2.6.16.46-0.12.orig/kernel/module.c	2007-08-24 19:28:20.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/module.c	2007-08-28 17:35:31.000000000 +0400
@@ -2085,6 +2085,8 @@ static void *m_start(struct seq_file *m,
 	loff_t n = 0;
 
 	down(&module_mutex);
+	if (!ve_is_super(get_exec_env()))
+		return NULL;
 	list_for_each(i, &modules) {
 		if (n++ == *pos)
 			break;
diff -upr linux-2.6.16.46-0.12.orig/kernel/mutex-debug.c linux-2.6.16.46-0.12-027test011/kernel/mutex-debug.c
--- linux-2.6.16.46-0.12.orig/kernel/mutex-debug.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/kernel/mutex-debug.c	2007-08-28 17:35:31.000000000 +0400
@@ -193,12 +193,12 @@ retry:
 	if (count != 10)
 		printk(" locked it.\n");
 
-	do_each_thread(g, p) {
+	do_each_thread_all(g, p) {
 		show_task_locks(p);
 		if (!unlock)
 			if (read_trylock(&tasklist_lock))
 				unlock = 1;
-	} while_each_thread(g, p);
+	} while_each_thread_all(g, p);
 
 	printk("\n");
 	show_held_locks(NULL);
diff -upr linux-2.6.16.46-0.12.orig/kernel/pagg.c linux-2.6.16.46-0.12-027test011/kernel/pagg.c
--- linux-2.6.16.46-0.12.orig/kernel/pagg.c	2007-08-24 19:28:24.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/pagg.c	2007-08-28 17:35:31.000000000 +0400
@@ -175,7 +175,7 @@ remove_client_paggs_from_all_tasks(struc
 		struct task_struct *p = NULL;
 
 		read_lock(&tasklist_lock);
-		for_each_process(p) {
+		for_each_process_all(p) {
 			struct pagg *paggp;
 
 			get_task_struct(p);
@@ -275,7 +275,7 @@ pagg_hook_register(struct pagg_hook *pag
 			struct task_struct *p = NULL;
 
 			read_lock(&tasklist_lock);
-			for_each_process(p) {
+			for_each_process_all(p) {
 				struct pagg *paggp;
 
 				get_task_struct(p);
diff -upr linux-2.6.16.46-0.12.orig/kernel/panic.c linux-2.6.16.46-0.12-027test011/kernel/panic.c
--- linux-2.6.16.46-0.12.orig/kernel/panic.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/panic.c	2007-08-28 17:35:36.000000000 +0400
@@ -23,11 +23,20 @@
 int panic_timeout;
 int panic_on_oops;
 int tainted;
+int kernel_text_csum_broken;
 void (*dump_function_ptr)(const char *, const struct pt_regs *) = 0;
 
+EXPORT_SYMBOL(kernel_text_csum_broken);
 EXPORT_SYMBOL(panic_timeout);
 EXPORT_SYMBOL_GPL(dump_function_ptr);
 
+void check_kernel_csum_bug(void)
+{
+	if (kernel_text_csum_broken)
+		printk("Kernel code checksum mismatch detected %d times\n",
+				kernel_text_csum_broken);
+}
+
 struct notifier_block *panic_notifier_list;
 
 EXPORT_SYMBOL(panic_notifier_list);
@@ -181,7 +190,8 @@ const char *print_tainted(void)
 {
 	static char buf[20];
 	if (tainted) {
-		snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c",
+		snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c%c",
+			kernel_text_csum_broken ? 'B' : ' ',
 			tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G',
 			tainted & TAINT_FORCED_MODULE ? 'F' : ' ',
 			tainted & TAINT_UNSAFE_SMP ? 'S' : ' ',
diff -upr linux-2.6.16.46-0.12.orig/kernel/pid.c linux-2.6.16.46-0.12-027test011/kernel/pid.c
--- linux-2.6.16.46-0.12.orig/kernel/pid.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/kernel/pid.c	2007-08-28 17:35:36.000000000 +0400
@@ -27,12 +27,19 @@
 #include <linux/bootmem.h>
 #include <linux/hash.h>
 
+#ifdef CONFIG_VE
+static void __free_vpid(int vpid, struct ve_struct *ve);
+int glob_virt_pids = 1;
+EXPORT_SYMBOL(glob_virt_pids);
+#endif
+
 #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
 static struct hlist_head *pid_hash[PIDTYPE_MAX];
 static int pidhash_shift;
 
 int pid_max = PID_MAX_DEFAULT;
 int last_pid;
+EXPORT_SYMBOL(last_pid);
 
 #define RESERVED_PIDS		300
 
@@ -57,8 +64,14 @@ typedef struct pidmap {
 	void *page;
 } pidmap_t;
 
+#ifdef CONFIG_VE
+#define PIDMAP_NRFREE (BITS_PER_PAGE/2)
+#else
+#define PIDMAP_NRFREE BITS_PER_PAGE
+#endif
+
 static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
-	 { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } };
+	 { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(PIDMAP_NRFREE), NULL } };
 
 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
 
@@ -67,9 +80,13 @@ fastcall void free_pidmap(int pid)
 	pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
 	int offset = pid & BITS_PER_PAGE_MASK;
 
-	clear_bit(offset, map->page);
+	BUG_ON(__is_virtual_pid(pid) || pid == 1);
+
+	if (test_and_clear_bit(offset, map->page) == 0)
+		BUG();
 	atomic_inc(&map->nr_free);
 }
+EXPORT_SYMBOL_GPL(free_pidmap);
 
 int alloc_pidmap(void)
 {
@@ -77,6 +94,8 @@ int alloc_pidmap(void)
 	pidmap_t *map;
 
 	pid = last + 1;
+	if (__is_virtual_pid(pid))
+		pid += VPID_DIV;
 	if (pid >= pid_max)
 		pid = RESERVED_PIDS;
 	offset = pid & BITS_PER_PAGE_MASK;
@@ -106,6 +125,8 @@ int alloc_pidmap(void)
 					return pid;
 				}
 				offset = find_next_offset(map, offset);
+				if (__is_virtual_pid(offset))
+					offset += VPID_DIV;
 				pid = mk_pid(map, offset);
 			/*
 			 * find_next_offset() found a bit, the pid from it
@@ -130,6 +151,7 @@ int alloc_pidmap(void)
 	}
 	return -1;
 }
+EXPORT_SYMBOL_GPL(alloc_pidmap);
 
 struct pid * fastcall find_pid(enum pid_type type, int nr)
 {
@@ -143,6 +165,7 @@ struct pid * fastcall find_pid(enum pid_
 	}
 	return NULL;
 }
+EXPORT_SYMBOL(find_pid);
 
 int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
 {
@@ -162,6 +185,7 @@ int fastcall attach_pid(task_t *task, en
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(attach_pid);
 
 static fastcall int __detach_pid(task_t *task, enum pid_type type)
 {
@@ -201,13 +225,27 @@ void fastcall detach_pid(task_t *task, e
 		if (tmp != type && find_pid(tmp, nr))
 			return;
 
+#ifdef CONFIG_VE
+	__free_vpid(task->pids[type].vnr, VE_TASK_INFO(task)->owner_env);
+#endif
 	free_pidmap(nr);
 }
+EXPORT_SYMBOL_GPL(detach_pid);
 
 task_t *find_task_by_pid_type(int type, int nr)
 {
+	BUG();
+	return NULL;
+}
+
+EXPORT_SYMBOL(find_task_by_pid_type);
+
+task_t *find_task_by_pid_type_all(int type, int nr)
+{
 	struct pid *pid;
 
+	BUG_ON(nr != -1 && is_virtual_pid(nr));
+
 	pid = find_pid(type, nr);
 	if (!pid)
 		return NULL;
@@ -215,7 +253,35 @@ task_t *find_task_by_pid_type(int type, 
 	return pid_task(&pid->pid_list, type);
 }
 
-EXPORT_SYMBOL(find_task_by_pid_type);
+EXPORT_SYMBOL(find_task_by_pid_type_all);
+
+#ifdef CONFIG_VE
+
+task_t *find_task_by_pid_type_ve(int type, int nr)
+{
+	task_t *tsk;
+	int gnr = nr;
+	struct pid *pid;
+
+	if (is_virtual_pid(nr)) {
+		gnr = __vpid_to_pid(nr);
+		if (unlikely(gnr == -1))
+			return NULL;
+	}
+
+	pid = find_pid(type, gnr);
+	if (!pid)
+		return NULL;
+
+	tsk = pid_task(&pid->pid_list, type);
+	if (!ve_accessible(VE_TASK_INFO(tsk)->owner_env, get_exec_env()))
+		return NULL;
+	return tsk;
+}
+
+EXPORT_SYMBOL(find_task_by_pid_type_ve);
+
+#endif
 
 /*
  * This function switches the PIDs if a non-leader thread calls
@@ -234,12 +300,16 @@ void switch_exec_pids(task_t *leader, ta
 
 	leader->pid = leader->tgid = thread->pid;
 	thread->pid = thread->tgid;
+	set_virt_tgid(leader, virt_pid(thread));
+	set_virt_pid(leader, virt_pid(thread));
+	set_virt_pid(thread, virt_tgid(thread));
 
 	attach_pid(thread, PIDTYPE_PID, thread->pid);
 	attach_pid(thread, PIDTYPE_TGID, thread->tgid);
 	attach_pid(thread, PIDTYPE_PGID, thread->signal->pgrp);
 	attach_pid(thread, PIDTYPE_SID, thread->signal->session);
 	list_add_tail(&thread->tasks, &init_task.tasks);
+	SET_VE_LINKS(thread);
 
 	attach_pid(leader, PIDTYPE_PID, leader->pid);
 	attach_pid(leader, PIDTYPE_TGID, leader->tgid);
@@ -247,6 +317,367 @@ void switch_exec_pids(task_t *leader, ta
 	attach_pid(leader, PIDTYPE_SID, leader->signal->session);
 }
 
+#ifdef CONFIG_VE
+
+/* Virtual PID bits.
+ *
+ * At the moment all internal structures in kernel store real global pid.
+ * The only place, where virtual PID is used, is at user frontend. We
+ * remap virtual pids obtained from user to global ones (vpid_to_pid) and
+ * map globals to virtuals before showing them to user (virt_pid_type).
+ *
+ * We hold virtual PIDs inside struct pid, so map global -> virtual is easy.
+ */
+
+pid_t _pid_type_to_vpid(int type, pid_t pid)
+{
+	struct pid * p;
+
+	if (unlikely(is_virtual_pid(pid)))
+		return -1;
+
+	read_lock(&tasklist_lock);
+	p = find_pid(type, pid);
+	if (p) {
+		pid = p->vnr;
+	} else {
+		pid = -1;
+	}
+	read_unlock(&tasklist_lock);
+	return pid;
+}
+EXPORT_SYMBOL_GPL(_pid_type_to_vpid);
+
+pid_t pid_type_to_vpid(int type, pid_t pid)
+{
+	int vpid;
+
+	if (unlikely(pid <= 0))
+		return pid;
+
+	BUG_ON(is_virtual_pid(pid));
+
+	if (ve_is_super(get_exec_env()))
+		return pid;
+
+	vpid = _pid_type_to_vpid(type, pid);
+	if (unlikely(vpid == -1)) {
+		/* It is allowed: global pid can be used everywhere.
+		 * This can happen, when kernel remembers stray pids:
+		 * signal queues, locks etc.
+		 */
+		vpid = pid;
+	}
+	return vpid;
+}
+EXPORT_SYMBOL_GPL(pid_type_to_vpid);
+
+/* To map virtual pids to global we maintain special hash table.
+ *
+ * Mapping entries are allocated when a process with non-trivial
+ * mapping is forked, which is possible only after VE migrated.
+ * Mappings are destroyed, when a global pid is removed from global
+ * pidmap, which means we do not need to refcount mappings.
+ */
+
+static struct hlist_head *vpid_hash;
+
+struct vpid_mapping
+{
+	int	vpid;
+	int	veid;
+	int	pid;
+	struct hlist_node link;
+	struct rcu_head rcu;
+};
+
+static kmem_cache_t *vpid_mapping_cachep;
+
+static inline int vpid_hashfn(int vnr, int veid)
+{
+	return hash_long((unsigned long)(vnr+(veid<<16)), pidhash_shift);
+}
+
+struct vpid_mapping *__lookup_vpid_mapping(int vnr, int veid)
+{
+	struct hlist_node *elem;
+	struct vpid_mapping *map;
+
+	hlist_for_each_entry_rcu(map, elem,
+			&vpid_hash[vpid_hashfn(vnr, veid)], link) {
+		if (map->vpid == vnr && map->veid == veid)
+			return map;
+	}
+	return NULL;
+}
+
+/* __vpid_to_pid() is raw version of vpid_to_pid(). It is to be used
+ * only under tasklist_lock. In some places we must use only this version
+ * (f.e. __kill_pg_info is called under write lock!)
+ *
+ * Caller should pass virtual pid. This function returns an error, when
+ * seeing a global pid.
+ */
+int __vpid_to_pid(int pid)
+{
+	struct vpid_mapping *map;
+
+	if (unlikely(!is_virtual_pid(pid) || ve_is_super(get_exec_env())))
+		return -1;
+
+	if (!get_exec_env()->sparse_vpid) {
+		int init_pid;
+
+		init_pid = get_exec_env()->init_entry->pid;
+		if (pid == 1)
+			return init_pid;
+		if (pid == init_pid + VPID_DIV)
+			return -1; /* vpid of init is 1 */
+		return pid - VPID_DIV;
+	}
+
+	map = __lookup_vpid_mapping(pid, VEID(get_exec_env()));
+	if (map)
+		return map->pid;
+	return -1;
+}
+EXPORT_SYMBOL_GPL(__vpid_to_pid);
+
+int vpid_to_pid(int pid)
+{
+	/* User gave bad pid. It is his problem. */
+	if (unlikely(pid <= 0))
+		return pid;
+
+	if (!is_virtual_pid(pid))
+		return pid;
+
+	read_lock(&tasklist_lock);
+	pid = __vpid_to_pid(pid);
+	read_unlock(&tasklist_lock);
+	return pid;
+}
+EXPORT_SYMBOL_GPL(vpid_to_pid);
+
+/* VEs which never migrated have trivial "arithmetic" mapping pid <-> vpid:
+ *
+ * vpid == 1 -> ve->init_task->pid
+ * else	        pid & ~VPID_DIV
+ *
+ * In this case VE has ve->sparse_vpid = 0 and we do not use vpid hash table.
+ *
+ * When VE migrates and we see non-trivial mapping the first time, we
+ * scan process table and populate mapping hash table.
+ */
+
+static int add_mapping(int pid, int vpid, int veid, struct hlist_head *cache)
+{
+        if (unlikely(pid <= 0 || vpid <= 0))
+		return 0;
+
+	/* VE can contain non-virtual (VE_ENTER'ed) processes when
+	 * switching to sparse mapping. We should not create mappings
+	 * for them. */
+	if (unlikely(!__is_virtual_pid(vpid) && vpid != 1)) {
+		printk("DEBUG (do not worry, but report): non-virtual pid while switching mode %d %d\n", pid, vpid);
+		return 0;
+	}
+
+	if (!__lookup_vpid_mapping(vpid, veid)) {
+		struct vpid_mapping *m;
+		if (hlist_empty(cache)) {
+			m = kmem_cache_alloc(vpid_mapping_cachep, GFP_ATOMIC);
+			if (unlikely(m == NULL))
+				return -ENOMEM;
+		} else {
+			m = hlist_entry(cache->first, struct vpid_mapping, link);
+			hlist_del_rcu(&m->link);
+		}
+		m->pid = pid;
+		m->vpid = vpid;
+		m->veid = veid;
+		hlist_add_head_rcu(&m->link,
+			       &vpid_hash[vpid_hashfn(vpid, veid)]);
+	}
+	return 0;
+}
+
+static int switch_to_sparse_mapping(int pid)
+{
+	struct ve_struct *env = get_exec_env();
+	struct hlist_head cache;
+	task_t *g, *t;
+	int pcount;
+	int err;
+
+	/* Transition happens under write_lock_irq, so we try to make
+	 * it more reliable and fast preallocating mapping entries.
+	 * pcounter may be not enough, we could have lots of orphaned
+	 * process groups and sessions, which also require mappings.
+	 */
+	INIT_HLIST_HEAD(&cache);
+	pcount = atomic_read(&env->pcounter);
+	err = -ENOMEM;
+	while (pcount > 0) {
+		struct vpid_mapping *m;
+		m = kmem_cache_alloc(vpid_mapping_cachep, GFP_KERNEL);
+		if (!m)
+			goto out;
+		hlist_add_head(&m->link, &cache);
+		pcount--;
+	}
+
+	write_lock_irq(&tasklist_lock);
+	err = 0;
+	if (env->sparse_vpid)
+		goto out_unlock;
+
+	err = -ENOMEM;
+	do_each_thread_ve(g, t) {
+		if (t->pid == pid)
+			continue;
+		if (add_mapping(t->pid, virt_pid(t), VEID(env), &cache))
+			goto out_unlock;
+	} while_each_thread_ve(g, t);
+
+	for_each_process_ve(t) {
+		if (t->pid == pid)
+			continue;
+
+		if (add_mapping(t->tgid, virt_tgid(t), VEID(env), &cache))
+			goto out_unlock;
+		if (add_mapping(t->signal->pgrp, virt_pgid(t), VEID(env), &cache))
+			goto out_unlock;
+		if (add_mapping(t->signal->session, virt_sid(t), VEID(env), &cache))
+			goto out_unlock;
+	}
+	env->sparse_vpid = 1;
+	err = 0;
+
+out_unlock:
+	if (err) {
+		int i;
+
+		for (i=0; i<(1<<pidhash_shift); i++) {
+			struct hlist_node *elem, *next;
+			struct vpid_mapping *map;
+
+			hlist_for_each_entry_safe(map, elem, next, &vpid_hash[i], link) {
+				if (map->veid == VEID(env)) {
+					hlist_del(elem);
+					hlist_add_head(elem, &cache);
+				}
+			}
+		}
+	}
+	write_unlock_irq(&tasklist_lock);
+
+out:
+	while (!hlist_empty(&cache)) {
+		struct vpid_mapping *m;
+		m = hlist_entry(cache.first, struct vpid_mapping, link);
+		hlist_del_rcu(&m->link);
+		kmem_cache_free(vpid_mapping_cachep, m);
+	}
+	return err;
+}
+
+int alloc_vpid(int pid, int virt_pid)
+{
+	int result;
+	struct vpid_mapping *m;
+	struct ve_struct *env = get_exec_env();
+
+	if (ve_is_super(env) || !env->virt_pids)
+		return pid;
+
+	if (!env->sparse_vpid) {
+		if (virt_pid == -1)
+			return pid + VPID_DIV;
+
+		if (virt_pid == 1 || virt_pid == pid + VPID_DIV)
+			return virt_pid;
+
+		if ((result = switch_to_sparse_mapping(pid)) < 0)
+			return result;
+	}
+
+	m = kmem_cache_alloc(vpid_mapping_cachep, GFP_KERNEL);
+	if (!m)
+		return -ENOMEM;
+
+	m->pid = pid;
+	m->veid = VEID(env);
+
+	result = (virt_pid == -1) ? pid + VPID_DIV : virt_pid;
+
+	write_lock_irq(&tasklist_lock);
+	if (unlikely(__lookup_vpid_mapping(result, m->veid))) {
+		if (virt_pid > 0) {
+			result = -EEXIST;
+			goto out;
+		}
+
+		/* No luck. Now we search for some not-existing vpid.
+		 * It is weak place. We do linear search. */
+		do {
+			result++;
+			if (!__is_virtual_pid(result))
+				result += VPID_DIV;
+			if (result >= pid_max)
+				result = RESERVED_PIDS + VPID_DIV;
+		} while (__lookup_vpid_mapping(result, m->veid) != NULL);
+
+		/* And set last_pid in hope future alloc_pidmap to avoid
+		 * collisions after future alloc_pidmap() */
+		last_pid = result - VPID_DIV;
+	}
+	if (result > 0) {
+		m->vpid = result;
+		hlist_add_head_rcu(&m->link,
+			       &vpid_hash[vpid_hashfn(result, m->veid)]);
+	}
+out:
+	write_unlock_irq(&tasklist_lock);
+	if (result < 0)
+		kmem_cache_free(vpid_mapping_cachep, m);
+	return result;
+}
+EXPORT_SYMBOL(alloc_vpid);
+
+static void vpid_free_cb(struct rcu_head *rhp)
+{
+	struct vpid_mapping *m;
+
+	m = container_of(rhp, struct vpid_mapping, rcu);
+	kmem_cache_free(vpid_mapping_cachep, m);
+}
+
+static void __free_vpid(int vpid, struct ve_struct *ve)
+{
+	struct vpid_mapping *m;
+
+	if (!ve->sparse_vpid)
+		return;
+
+	if (!__is_virtual_pid(vpid) && (vpid != 1 || ve_is_super(ve)))
+		return;
+
+	m = __lookup_vpid_mapping(vpid, ve->veid);
+	BUG_ON(m == NULL);
+	hlist_del_rcu(&m->link);
+	call_rcu(&m->rcu, vpid_free_cb);
+}
+
+void free_vpid(int vpid, struct ve_struct *ve)
+{
+	write_lock_irq(&tasklist_lock);
+	__free_vpid(vpid, ve);
+	write_unlock_irq(&tasklist_lock);
+}
+EXPORT_SYMBOL(free_vpid);
+#endif
+
 /*
  * The pid hash table is scaled according to the amount of memory in the
  * machine.  From a minimum of 16 slots up to 4096 slots at one gigabyte or
@@ -273,6 +704,14 @@ void __init pidhash_init(void)
 		for (j = 0; j < pidhash_size; j++)
 			INIT_HLIST_HEAD(&pid_hash[i][j]);
 	}
+
+#ifdef CONFIG_VE
+	vpid_hash = alloc_bootmem(pidhash_size * sizeof(struct hlist_head));
+	if (!vpid_hash)
+		panic("Could not alloc vpid_hash!\n");
+	for (j = 0; j < pidhash_size; j++)
+		INIT_HLIST_HEAD(&vpid_hash[j]);
+#endif
 }
 
 void __init pidmap_init(void)
@@ -289,4 +728,12 @@ void __init pidmap_init(void)
 
 	for (i = 0; i < PIDTYPE_MAX; i++)
 		attach_pid(current, i, 0);
+
+#ifdef CONFIG_VE
+	vpid_mapping_cachep =
+		kmem_cache_create("vpid_mapping",
+				  sizeof(struct vpid_mapping),
+				  __alignof__(struct vpid_mapping),
+				  SLAB_PANIC|SLAB_UBC, NULL, NULL);
+#endif
 }
diff -upr linux-2.6.16.46-0.12.orig/kernel/posix-cpu-timers.c linux-2.6.16.46-0.12-027test011/kernel/posix-cpu-timers.c
--- linux-2.6.16.46-0.12.orig/kernel/posix-cpu-timers.c	2007-08-24 19:28:08.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/posix-cpu-timers.c	2007-08-28 17:35:31.000000000 +0400
@@ -20,7 +20,7 @@ static int check_clock(const clockid_t w
 		return 0;
 
 	read_lock(&tasklist_lock);
-	p = find_task_by_pid(pid);
+	p = find_task_by_pid_ve(pid);
 	if (!p || (CPUCLOCK_PERTHREAD(which_clock) ?
 		   p->tgid != current->tgid : p->tgid != pid)) {
 		error = -EINVAL;
@@ -305,7 +305,7 @@ int posix_cpu_clock_get(const clockid_t 
 		 */
 		struct task_struct *p;
 		read_lock(&tasklist_lock);
-		p = find_task_by_pid(pid);
+		p = find_task_by_pid_ve(pid);
 		if (p) {
 			if (CPUCLOCK_PERTHREAD(which_clock)) {
 				if (p->tgid == current->tgid) {
@@ -349,7 +349,7 @@ int posix_cpu_timer_create(struct k_itim
 		if (pid == 0) {
 			p = current;
 		} else {
-			p = find_task_by_pid(pid);
+			p = find_task_by_pid_ve(pid);
 			if (p && p->tgid != current->tgid)
 				p = NULL;
 		}
@@ -357,7 +357,7 @@ int posix_cpu_timer_create(struct k_itim
 		if (pid == 0) {
 			p = current->group_leader;
 		} else {
-			p = find_task_by_pid(pid);
+			p = find_task_by_pid_ve(pid);
 			if (p && p->tgid != pid)
 				p = NULL;
 		}
diff -upr linux-2.6.16.46-0.12.orig/kernel/posix-timers.c linux-2.6.16.46-0.12-027test011/kernel/posix-timers.c
--- linux-2.6.16.46-0.12.orig/kernel/posix-timers.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/kernel/posix-timers.c	2007-08-28 17:35:31.000000000 +0400
@@ -31,6 +31,7 @@
  * POSIX clocks & timers
  */
 #include <linux/mm.h>
+#include <linux/module.h>
 #include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
@@ -48,6 +49,8 @@
 #include <linux/workqueue.h>
 #include <linux/module.h>
 
+#include <ub/beancounter.h>
+
 /*
  * Management arrays for POSIX timers.	 Timers are kept in slab memory
  * Timer ids are allocated by an external routine that keeps track of the
@@ -241,7 +244,8 @@ static __init int init_posix_timers(void
 	register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
 
 	posix_timers_cache = kmem_cache_create("posix_timers_cache",
-					sizeof (struct k_itimer), 0, 0, NULL, NULL);
+					sizeof (struct k_itimer), 0,
+					SLAB_UBC, NULL, NULL);
 	idr_init(&posix_timers_id);
 	return 0;
 }
@@ -294,6 +298,13 @@ void do_schedule_next_timer(struct sigin
 
 int posix_timer_event(struct k_itimer *timr,int si_private)
 {
+	int ret;
+	struct ve_struct *ve;
+	struct user_beancounter *ub;
+
+	ve = set_exec_env(timr->it_process->ve_task_info.owner_env);
+	ub = set_exec_ub(timr->it_process->task_bc.task_ub);
+
 	memset(&timr->sigq->info, 0, sizeof(siginfo_t));
 	timr->sigq->info.si_sys_private = si_private;
 	/* Send signal to the process that owns this timer.*/
@@ -306,11 +317,11 @@ int posix_timer_event(struct k_itimer *t
 
 	if (timr->it_sigev_notify & SIGEV_THREAD_ID) {
 		struct task_struct *leader;
-		int ret = send_sigqueue(timr->it_sigev_signo, timr->sigq,
+		ret = send_sigqueue(timr->it_sigev_signo, timr->sigq,
 					timr->it_process);
 
 		if (likely(ret >= 0))
-			return ret;
+			goto out;
 
 		timr->it_sigev_notify = SIGEV_SIGNAL;
 		leader = timr->it_process->group_leader;
@@ -318,8 +329,12 @@ int posix_timer_event(struct k_itimer *t
 		timr->it_process = leader;
 	}
 
-	return send_group_sigqueue(timr->it_sigev_signo, timr->sigq,
+	ret = send_group_sigqueue(timr->it_sigev_signo, timr->sigq,
 				   timr->it_process);
+out:
+	(void)set_exec_ub(ub);
+	(void)set_exec_env(ve);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(posix_timer_event);
 
@@ -366,7 +381,7 @@ static struct task_struct * good_sigeven
 	struct task_struct *rtn = current->group_leader;
 
 	if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
-		(!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) ||
+		(!(rtn = find_task_by_pid_ve(event->sigev_notify_thread_id)) ||
 		 rtn->tgid != current->tgid ||
 		 (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
 		return NULL;
diff -upr linux-2.6.16.46-0.12.orig/kernel/power/Kconfig linux-2.6.16.46-0.12-027test011/kernel/power/Kconfig
--- linux-2.6.16.46-0.12.orig/kernel/power/Kconfig	2007-08-24 19:28:07.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/power/Kconfig	2007-08-28 17:35:33.000000000 +0400
@@ -38,7 +38,7 @@ config PM_DEBUG
 
 config SOFTWARE_SUSPEND
 	bool "Software Suspend"
-	depends on PM && SWAP && (X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP)
+	depends on PM && SWAP && X86 || ((FRV || PPC32) && !SMP)
 	---help---
 	  Enable the possibility of suspending the machine.
 	  It doesn't need APM.
diff -upr linux-2.6.16.46-0.12.orig/kernel/power/process.c linux-2.6.16.46-0.12-027test011/kernel/power/process.c
--- linux-2.6.16.46-0.12.orig/kernel/power/process.c	2007-08-24 19:28:06.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/power/process.c	2007-08-28 17:35:33.000000000 +0400
@@ -18,6 +18,7 @@
  */
 #define TIMEOUT	(6 * HZ)
 
+extern atomic_t global_suspend;
 
 static inline int freezeable(struct task_struct * p)
 {
@@ -30,29 +31,6 @@ static inline int freezeable(struct task
 	return 1;
 }
 
-/* Refrigerator is place where frozen processes are stored :-). */
-void refrigerator(void)
-{
-	/* Hmm, should we be allowed to suspend when there are realtime
-	   processes around? */
-	long save;
-	save = current->state;
-	pr_debug("%s entered refrigerator\n", current->comm);
-	printk("=");
-
-	frozen_process(current);
-	spin_lock_irq(&current->sighand->siglock);
-	recalc_sigpending(); /* We sent fake signal, clean it up */
-	spin_unlock_irq(&current->sighand->siglock);
-
-	while (frozen(current)) {
-		current->state = TASK_UNINTERRUPTIBLE;
-		schedule();
-	}
-	pr_debug("%s left refrigerator\n", current->comm);
-	current->state = save;
-}
-
 /* 0 = success, else # of processes that we failed to stop */
 int freeze_processes(void)
 {
@@ -61,12 +39,13 @@ int freeze_processes(void)
 	struct task_struct *g, *p;
 	unsigned long flags;
 
+	atomic_inc(&global_suspend);
 	printk( "Stopping tasks: " );
 	start_time = jiffies;
 	do {
 		todo = 0;
 		read_lock(&tasklist_lock);
-		do_each_thread(g, p) {
+		do_each_thread_all(g, p) {
 			if (!freezeable(p))
 				continue;
 			if (frozen(p))
@@ -77,7 +56,7 @@ int freeze_processes(void)
 			signal_wake_up(p, 0);
 			spin_unlock_irqrestore(&p->sighand->siglock, flags);
 			todo++;
-		} while_each_thread(g, p);
+		} while_each_thread_all(g, p);
 		read_unlock(&tasklist_lock);
 		yield();			/* Yield is okay here */
 		if (todo && time_after(jiffies, start_time + TIMEOUT)) {
@@ -87,6 +66,8 @@ int freeze_processes(void)
 		}
 	} while(todo);
 
+	atomic_dec(&global_suspend);
+
 	/* This does not unfreeze processes that are already frozen
 	 * (we have slightly ugly calling convention in that respect,
 	 * and caller must call thaw_processes() if something fails),
@@ -94,20 +75,20 @@ int freeze_processes(void)
 	 */
 	if (todo) {
 		read_lock(&tasklist_lock);
-		do_each_thread(g, p)
+		do_each_thread_all(g, p)
 			if (freezing(p)) {
 				pr_debug("  clean up: %s\n", p->comm);
-				p->flags &= ~PF_FREEZE;
 				spin_lock_irqsave(&p->sighand->siglock, flags);
+				clear_tsk_thread_flag(p, TIF_FREEZE);
 				recalc_sigpending_tsk(p);
 				spin_unlock_irqrestore(&p->sighand->siglock, flags);
 			}
-		while_each_thread(g, p);
+		while_each_thread_all(g, p);
 		read_unlock(&tasklist_lock);
 		return todo;
 	}
 
-	printk( "|\n" );
+	/* printk( "|\n" ); */
 	BUG_ON(in_atomic());
 	return 0;
 }
@@ -118,16 +99,14 @@ void thaw_processes(void)
 
 	printk( "Restarting tasks..." );
 	read_lock(&tasklist_lock);
-	do_each_thread(g, p) {
+	do_each_thread_all(g, p) {
 		if (!freezeable(p))
 			continue;
 		if (!thaw_process(p))
 			printk(KERN_INFO " Strange, %s not stopped\n", p->comm );
-	} while_each_thread(g, p);
+	} while_each_thread_all(g, p);
 
 	read_unlock(&tasklist_lock);
 	schedule();
 	printk( " done\n" );
 }
-
-EXPORT_SYMBOL(refrigerator);
diff -upr linux-2.6.16.46-0.12.orig/kernel/printk.c linux-2.6.16.46-0.12-027test011/kernel/printk.c
--- linux-2.6.16.46-0.12.orig/kernel/printk.c	2007-08-24 19:28:22.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/printk.c	2007-08-28 17:35:32.000000000 +0400
@@ -30,7 +30,9 @@
 #include <linux/smp.h>
 #include <linux/security.h>
 #include <linux/bootmem.h>
+#include <linux/vzratelimit.h>
 #include <linux/syscalls.h>
+#include <linux/veprintk.h>
 
 #include <asm/uaccess.h>
 
@@ -54,6 +56,9 @@ int console_printk[4] = {
 
 EXPORT_SYMBOL(console_printk);
 
+struct printk_aligned printk_no_wake_var[NR_CPUS];
+EXPORT_SYMBOL(printk_no_wake_var);
+
 /*
  * Low lever drivers may need that to know if they can schedule in
  * their unblank() callback or not. So let's export it.
@@ -83,7 +88,7 @@ static int console_locked;
  * It is also used in interesting ways to provide interlocking in
  * release_console_sem().
  */
-static DEFINE_SPINLOCK(logbuf_lock);
+DEFINE_SPINLOCK(logbuf_lock);
 
 #define LOG_BUF_MASK	(log_buf_len-1)
 #define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK])
@@ -223,6 +228,9 @@ int do_syslog(int type, char __user *buf
 	char c;
 	int error = 0;
 
+	if (!ve_is_super(get_exec_env()) && (type == 6 || type == 7))
+		goto out;
+
 	error = security_syslog(type);
 	if (error)
 		return error;
@@ -243,15 +251,15 @@ int do_syslog(int type, char __user *buf
 			error = -EFAULT;
 			goto out;
 		}
-		error = wait_event_interruptible(log_wait,
-							(log_start - log_end));
+		error = wait_event_interruptible(ve_log_wait,
+						(ve_log_start - ve_log_end));
 		if (error)
 			goto out;
 		i = 0;
 		spin_lock_irq(&logbuf_lock);
-		while (!error && (log_start != log_end) && i < len) {
-			c = LOG_BUF(log_start);
-			log_start++;
+		while (!error && (ve_log_start != ve_log_end) && i < len) {
+			c = VE_LOG_BUF(ve_log_start);
+			ve_log_start++;
 			spin_unlock_irq(&logbuf_lock);
 			error = __put_user(c,buf);
 			buf++;
@@ -277,15 +285,17 @@ int do_syslog(int type, char __user *buf
 			error = -EFAULT;
 			goto out;
 		}
+		if (ve_log_buf == NULL)
+			goto out;
 		count = len;
-		if (count > log_buf_len)
-			count = log_buf_len;
+		if (count > ve_log_buf_len)
+			count = ve_log_buf_len;
 		spin_lock_irq(&logbuf_lock);
-		if (count > logged_chars)
-			count = logged_chars;
+		if (count > ve_logged_chars)
+			count = ve_logged_chars;
 		if (do_clear)
-			logged_chars = 0;
-		limit = log_end;
+			ve_logged_chars = 0;
+		limit = ve_log_end;
 		/*
 		 * __put_user() could sleep, and while we sleep
 		 * printk() could overwrite the messages
@@ -294,9 +304,9 @@ int do_syslog(int type, char __user *buf
 		 */
 		for (i = 0; i < count && !error; i++) {
 			j = limit-1-i;
-			if (j + log_buf_len < log_end)
+			if (j + ve_log_buf_len < ve_log_end)
 				break;
-			c = LOG_BUF(j);
+			c = VE_LOG_BUF(j);
 			spin_unlock_irq(&logbuf_lock);
 			error = __put_user(c,&buf[count-1-i]);
 			cond_resched();
@@ -320,7 +330,7 @@ int do_syslog(int type, char __user *buf
 		}
 		break;
 	case 5:		/* Clear ring buffer */
-		logged_chars = 0;
+		ve_logged_chars = 0;
 		break;
 	case 6:		/* Disable logging to console */
 		console_loglevel = minimum_console_loglevel;
@@ -332,16 +342,19 @@ int do_syslog(int type, char __user *buf
 		error = -EINVAL;
 		if (len < 1 || len > 8)
 			goto out;
+		error = 0;
+		/* VE has no console, so return success */
+		if (!ve_is_super(get_exec_env()))
+			goto out;
 		if (len < minimum_console_loglevel)
 			len = minimum_console_loglevel;
 		console_loglevel = len;
-		error = 0;
 		break;
 	case 9:		/* Number of chars in the log buffer */
-		error = log_end - log_start;
+		error = ve_log_end - ve_log_start;
 		break;
 	case 10:	/* Size of the log buffer */
-		error = log_buf_len;
+		error = ve_log_buf_len;
 		break;
 	default:
 		error = -EINVAL;
@@ -468,16 +481,18 @@ static void call_console_drivers(unsigne
 
 static void emit_log_char(char c)
 {
-	LOG_BUF(log_end) = c;
-	log_end++;
-	if (log_end - log_start > log_buf_len)
-		log_start = log_end - log_buf_len;
-	if (log_end - con_start > log_buf_len)
-		con_start = log_end - log_buf_len;
-	if (logged_chars < log_buf_len)
-		logged_chars++;
+	VE_LOG_BUF(ve_log_end) = c;
+	ve_log_end++;
+	if (ve_log_end - ve_log_start > ve_log_buf_len)
+		ve_log_start = ve_log_end - ve_log_buf_len;
+	if (ve_is_super(get_exec_env()) && ve_log_end - con_start > ve_log_buf_len)
+		con_start = ve_log_end - ve_log_buf_len;
+	if (ve_logged_chars < ve_log_buf_len)
+		ve_logged_chars++;
 }
 
+static unsigned long do_release_console_sem(unsigned long *flags);
+
 /*
  * Zap console related locks when oopsing. Only zap at most once
  * every 10 seconds, to leave time for slow consoles to print a
@@ -540,6 +555,30 @@ __attribute__((weak)) unsigned long long
  * printf(3)
  */
 
+static inline int ve_log_init(void)
+{
+#ifdef CONFIG_VE
+	if (ve_log_buf != NULL)
+		return 0;
+
+	if (ve_is_super(get_exec_env())) {
+		ve0._log_wait = &log_wait;
+		ve0._log_start = &log_start;
+		ve0._log_end = &log_end;
+		ve0._logged_chars = &logged_chars;
+		ve0.log_buf = log_buf;
+		return 0;
+	}
+
+	ve_log_buf = kmalloc(ve_log_buf_len, GFP_ATOMIC);
+	if (!ve_log_buf)
+		return -ENOMEM;
+
+	memset(ve_log_buf, 0, ve_log_buf_len);
+#endif
+	return 0;
+}
+
 asmlinkage int printk(const char *fmt, ...)
 {
 	va_list args;
@@ -555,13 +594,14 @@ asmlinkage int printk(const char *fmt, .
 /* cpu currently holding logbuf_lock */
 static volatile unsigned int printk_cpu = UINT_MAX;
 
-asmlinkage int vprintk(const char *fmt, va_list args)
+asmlinkage int __vprintk(const char *fmt, va_list args)
 {
 	unsigned long flags;
 	int printed_len;
 	char *p;
 	static char printk_buf[1024];
 	static int new_line = 1;
+	int err, need_wake;
 
 	preempt_disable();
 	if (unlikely(oops_in_progress) && printk_cpu == smp_processor_id())
@@ -573,6 +613,12 @@ asmlinkage int vprintk(const char *fmt, 
 	spin_lock_irqsave(&logbuf_lock, flags);
 	printk_cpu = smp_processor_id();
 
+	err = ve_log_init();
+	if (err) {
+		spin_unlock_irqrestore(&logbuf_lock, flags);
+		return err;
+	}
+
 	/* Emit the output into the temporary buffer */
 	printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args);
 
@@ -630,7 +676,26 @@ asmlinkage int vprintk(const char *fmt, 
 		spin_unlock_irqrestore(&logbuf_lock, flags);
 		goto out;
 	}
-	if (!down_trylock(&console_sem)) {
+	if (__printk_no_wake) {
+		/*
+		 * A difficult case, created by the console semaphore mess...
+		 * All wakeups are omitted.
+		 */
+		if (!atomic_add_negative(-1, &console_sem.count)) {
+			console_locked = 1;
+			console_may_schedule = 0;
+			do_release_console_sem(&flags);
+			console_locked = 0;
+			console_may_schedule = 0;
+		}
+		atomic_inc(&console_sem.count);
+		spin_unlock_irqrestore(&logbuf_lock, flags);
+	} else if (!ve_is_super(get_exec_env())) {
+		need_wake = (ve_log_start != ve_log_end);
+		spin_unlock_irqrestore(&logbuf_lock, flags);
+		if (!oops_in_progress && need_wake)
+			wake_up_interruptible(&ve_log_wait);
+	} else if (!down_trylock(&console_sem)) {
 		console_locked = 1;
 		/*
 		 * We own the drivers.  We can drop the spinlock and let
@@ -656,6 +721,59 @@ out:
 EXPORT_SYMBOL(printk);
 EXPORT_SYMBOL(vprintk);
 
+static struct timer_list conswakeup_timer;
+static void conswakeup_timer_call(unsigned long dumy)
+{
+	if (!down_trylock(&console_sem)) {
+		console_locked = 1;
+		console_may_schedule = 0;
+		release_console_sem();
+	}
+	mod_timer(&conswakeup_timer, jiffies + 5 * HZ);
+}
+
+static int __init conswakeup_init(void)
+{
+	init_timer(&conswakeup_timer);
+	conswakeup_timer.function = &conswakeup_timer_call;
+	conswakeup_timer.expires = jiffies + 5 * HZ;
+	add_timer(&conswakeup_timer);
+	return 0;
+}
+console_initcall(conswakeup_init);
+
+asmlinkage int vprintk(const char *fmt, va_list args)
+{
+	int i;
+	struct ve_struct *env;
+
+	env = set_exec_env(get_ve0());
+	i = __vprintk(fmt, args);
+	set_exec_env(env);
+	return i;
+}
+
+asmlinkage int ve_printk(int dst, const char *fmt, ...)
+{
+	va_list args;
+	int printed_len;
+
+	printed_len = 0;
+	if (ve_is_super(get_exec_env()) || (dst & VE0_LOG)) {
+		va_start(args, fmt);
+		printed_len = vprintk(fmt, args);
+		va_end(args);
+	}
+	if (!ve_is_super(get_exec_env()) && (dst & VE_LOG)) {
+		va_start(args, fmt);
+		printed_len = __vprintk(fmt, args);
+		va_end(args);
+	}
+	return printed_len;
+}
+EXPORT_SYMBOL(ve_printk);
+
+
 #else
 
 asmlinkage long sys_syslog(int type, char __user *buf, int len)
@@ -747,6 +865,12 @@ int is_console_locked(void)
 }
 EXPORT_SYMBOL(is_console_locked);
 
+void wake_up_klogd(void)
+{
+	if (!oops_in_progress && waitqueue_active(&log_wait))
+		wake_up_interruptible(&log_wait);
+}
+
 /**
  * release_console_sem - unlock the console system
  *
@@ -761,30 +885,39 @@ EXPORT_SYMBOL(is_console_locked);
  *
  * release_console_sem() may be called from any context.
  */
-void release_console_sem(void)
+static unsigned long do_release_console_sem(unsigned long *flags)
 {
-	unsigned long flags;
 	unsigned long _con_start, _log_end;
 	unsigned long wake_klogd = 0;
 
 	for ( ; ; ) {
-		spin_lock_irqsave(&logbuf_lock, flags);
 		wake_klogd |= log_start - log_end;
 		if (con_start == log_end)
 			break;			/* Nothing to print */
 		_con_start = con_start;
 		_log_end = log_end;
 		con_start = log_end;		/* Flush */
-		spin_unlock(&logbuf_lock);
+		spin_unlock_irqrestore(&logbuf_lock, *flags);
 		call_console_drivers(_con_start, _log_end);
-		local_irq_restore(flags);
+		spin_lock_irqsave(&logbuf_lock, *flags);
 	}
+
+	return wake_klogd;
+}
+
+void release_console_sem(void)
+{
+	unsigned long flags;
+	unsigned long wake_klogd;
+
+	spin_lock_irqsave(&logbuf_lock, flags);
+	wake_klogd = do_release_console_sem(&flags);
 	console_locked = 0;
 	console_may_schedule = 0;
 	up(&console_sem);
 	spin_unlock_irqrestore(&logbuf_lock, flags);
-	if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait))
-		wake_up_interruptible(&log_wait);
+	if (wake_klogd)
+		wake_up_klogd();
 }
 EXPORT_SYMBOL(release_console_sem);
 
@@ -1064,3 +1197,33 @@ int printk_ratelimit(void)
 				printk_ratelimit_burst);
 }
 EXPORT_SYMBOL(printk_ratelimit);
+
+/*
+ *	Rate limiting stuff.
+ */
+int vz_ratelimit(struct vz_rate_info *p)
+{
+	unsigned long cjif, djif;
+	unsigned long flags;
+	static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED;
+	long new_bucket;
+
+	spin_lock_irqsave(&ratelimit_lock, flags);
+	cjif = jiffies;
+	djif = cjif - p->last;
+	if (djif < p->interval) {
+		if (p->bucket >= p->burst) {
+			spin_unlock_irqrestore(&ratelimit_lock, flags);
+			return 0;
+		}
+		p->bucket++;
+	} else {
+		new_bucket = p->bucket - (djif / (unsigned)p->interval);
+		if (new_bucket < 0)
+			new_bucket = 0;
+		p->bucket = new_bucket + 1;
+	}
+	p->last = cjif;
+	spin_unlock_irqrestore(&ratelimit_lock, flags);
+	return 1;
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/profile.c linux-2.6.16.46-0.12-027test011/kernel/profile.c
--- linux-2.6.16.46-0.12.orig/kernel/profile.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/kernel/profile.c	2007-08-28 17:35:36.000000000 +0400
@@ -91,6 +91,8 @@ static DEFINE_RWLOCK(handoff_lock);
 static struct notifier_block * task_exit_notifier;
 static struct notifier_block * task_free_notifier;
 static struct notifier_block * munmap_notifier;
+
+EXPORT_SYMBOL(task_exit_notifier);
  
 void profile_task_exit(struct task_struct * task)
 {
diff -upr linux-2.6.16.46-0.12.orig/kernel/ptrace.c linux-2.6.16.46-0.12-027test011/kernel/ptrace.c
--- linux-2.6.16.46-0.12.orig/kernel/ptrace.c	2007-08-24 19:28:06.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/ptrace.c	2007-08-28 17:35:33.000000000 +0400
@@ -133,7 +133,10 @@ static int may_attach(struct task_struct
 	smp_rmb();
 	if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
 		return -EPERM;
-
+	if ((task->mm->vps_dumpable != 1) && !ve_is_super(get_exec_env()))
+		return -EPERM;
+	if (!ve_accessible(VE_TASK_INFO(task)->owner_env, get_exec_env()))
+		return -EPERM;
 	return security_ptrace(current, task);
 }
 
@@ -177,6 +180,8 @@ repeat:
 		goto repeat;
 	}
 
+	if (task->mm->vps_dumpable == 2)
+		goto bad;
 	/* the same process cannot be attached many times */
 	if (task->ptrace & PT_PTRACED)
 		goto bad;
@@ -280,6 +285,7 @@ int access_process_vm(struct task_struct
 	
 	return buf - old_buf;
 }
+EXPORT_SYMBOL_GPL(access_process_vm);
 
 int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len)
 {
@@ -477,7 +483,7 @@ struct task_struct *ptrace_get_task_stru
 		return ERR_PTR(-EPERM);
 
 	read_lock(&tasklist_lock);
-	child = find_task_by_pid(pid);
+	child = find_task_by_pid_ve(pid);
 	if (child)
 		get_task_struct(child);
 	read_unlock(&tasklist_lock);
diff -upr linux-2.6.16.46-0.12.orig/kernel/sched.c linux-2.6.16.46-0.12-027test011/kernel/sched.c
--- linux-2.6.16.46-0.12.orig/kernel/sched.c	2007-08-24 19:28:27.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/sched.c	2007-08-28 17:35:36.000000000 +0400
@@ -49,6 +49,8 @@
 #include <linux/syscalls.h>
 #include <linux/times.h>
 #include <linux/tsacct_kern.h>
+#include <linux/vsched.h>
+#include <linux/fairsched.h>
 #include <linux/delayacct.h>
 #include <linux/kprobes.h>
 #include <asm/tlb.h>
@@ -144,7 +146,7 @@ EXPORT_SYMBOL_GPL(dump_polling_oncpu);
 #ifdef CONFIG_SMP
 #define TIMESLICE_GRANULARITY(p)	(GRANULARITY * \
 		(1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)) * \
-			num_online_cpus())
+			vsched_num_online_vcpus(task_vsched(p)))
 #else
 #define TIMESLICE_GRANULARITY(p)	(GRANULARITY * \
 		(1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)))
@@ -188,7 +190,56 @@ static unsigned int task_timeslice(task_
 #define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran)	\
 				< (long long) (sd)->cache_hot_time)
 
-DEFINE_PER_CPU(struct runqueue, runqueues);
+/* VCPU scheduler state description */
+struct vcpu_struct;
+struct vcpu_scheduler {
+	struct list_head idle_list;
+	struct list_head active_list;
+	struct list_head running_list;
+#ifdef CONFIG_FAIRSCHED
+	struct fairsched_node *node;
+#endif
+	struct list_head list;
+	struct vcpu_struct *vcpu[NR_CPUS];
+	int id;
+	cpumask_t vcpu_online_map, vcpu_running_map;
+	cpumask_t pcpu_running_map;
+	int num_online_vcpus;
+} ____cacheline_internodealigned_in_smp;
+
+/* virtual CPU description */
+struct vcpu_struct {
+	struct runqueue rq;
+#ifdef CONFIG_SCHED_VCPU
+	unsigned active : 1,
+		 running : 1;
+	struct list_head list;
+	struct vcpu_scheduler *vsched;
+	int last_pcpu;
+	unsigned long start_time;
+	unsigned long stop_time;
+#endif
+	int id;
+} ____cacheline_internodealigned_in_smp;
+
+/* physical CPU description */
+struct pcpu_info {
+	struct vcpu_scheduler *vsched;
+	struct vcpu_struct *vcpu;
+	task_t *idle;
+#ifdef CONFIG_SMP
+	struct sched_domain *sd;
+#endif
+	int id;
+} ____cacheline_internodealigned_in_smp;
+
+struct pcpu_info pcpu_info[NR_CPUS];
+
+static LIST_HEAD(vsched_list);
+static DEFINE_SPINLOCK(vsched_list_lock);
+
+#define pcpu(nr)		(&pcpu_info[nr])
+#define this_pcpu()		(pcpu(smp_processor_id()))
 
 /*
  * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
@@ -197,13 +248,589 @@ DEFINE_PER_CPU(struct runqueue, runqueue
  * The domain tree of any CPU may only be accessed from within
  * preempt-disabled sections.
  */
+#define for_each_pdomain(sd, domain) \
+for (domain = rcu_dereference(sd); domain; domain = domain->parent)
+
 #define for_each_domain(cpu, domain) \
-for (domain = rcu_dereference(cpu_rq(cpu)->sd); domain; domain = domain->parent)
+	for_each_pdomain(vcpu_rq(cpu)->sd, domain)
+
+#ifdef CONFIG_SCHED_VCPU
+
+/* Used in find_idle_vsched() */
+static DEFINE_PER_CPU(int, find_busvs_last_pcpu);
+
+/*
+ * vcpu_timeslice - how many msec's runnable VCPU will stay on the same
+ * physical CPU. If vcpu_timeslice < 0, actual vcpu timeslice value will
+ * be calculated according to number of 'ready to run' vcpu's:
+ *
+ * vcpu_timeslice_actual = VCPU_TIMESLICE_MAX >>
+ *			((nr_runnable_vcpus - 1) / nr_pcpus)
+ */
+#define VCPU_TIMESLICE_MAX	8
+int vcpu_timeslice_actual;
+unsigned int nr_online_pcpus = 1;	/* mustn't be 0, cause it's divisor */
+/*
+ * Set initial value to -1, to not subtract '-1' each time.
+ */
+unsigned int nr_runnable_vcpus = -1;
+
+u32 vcpu_sched_timeslice = 5;
+int vcpu_timeslice = -1;
+u32 vcpu_hot_timeslice = 4;	/* < 4 won't work for HZ=250 */
+EXPORT_SYMBOL(vcpu_sched_timeslice);
+EXPORT_SYMBOL(vcpu_timeslice);
+EXPORT_SYMBOL(vcpu_hot_timeslice);
+
+extern spinlock_t fairsched_lock;
+static struct vcpu_scheduler default_vsched, idle_vsched;
+static struct vcpu_struct boot_vcpu, boot_idle_vcpu;
+
+#define vsched_default_vsched()	(&default_vsched)
+#define vsched_default_vcpu(id)	(default_vsched.vcpu[id])
+
+/*
+ * All macroses below could be used without locks, if there is no
+ * strict ordering requirements, because we assume, that:
+ *
+ * 1. VCPU could not disappear "on the fly" (FIXME)
+ *
+ * 2. p->vsched access is atomic.
+ */
+
+#define task_vsched(tsk)	((tsk)->vsched)
+#define this_vsched()		(task_vsched(current))
+
+#define vsched_vcpu(vsched, id)	((vsched)->vcpu[id])
+#define this_vcpu()		(task_vcpu(current))
+#define task_vcpu(p)		((p)->vcpu)
+
+#define vsched_id(vsched)	((vsched)->id)
+#define vsched_vcpu_online_map(vsched)	((vsched)->vcpu_online_map)
+#define vsched_num_online_vcpus(vsched)	((vsched)->num_online_vcpus)
+#define vsched_pcpu_running_map(vsched)	((vsched)->pcpu_running_map)
+
+#define vcpu_vsched(vcpu)	((vcpu)->vsched)
+#define vcpu_last_pcpu(vcpu)	((vcpu)->last_pcpu)
+#define vcpu_isset(vcpu, mask)	(cpu_isset((vcpu)->id, mask))
+#define vcpu_is_offline(vcpu)	(!vcpu_isset(vcpu, \
+					vcpu_vsched(vcpu)->vcpu_online_map))
+
+static int __add_vcpu(struct vcpu_scheduler *vsched, int id);
+
+#define vcpu_is_hot(vcpu)	(jiffies - (vcpu)->start_time \
+					< msecs_to_jiffies(vcpu_timeslice_actual))
+#else	/* CONFIG_SCHED_VCPU */
+
+static DEFINE_PER_CPU(struct vcpu_struct, vcpu_struct);
+
+#define task_vsched(p)		NULL
+#define this_vcpu()		(task_vcpu(current))
+#define task_vcpu(p)		(vcpu(task_cpu(p)))
+
+#define vsched_vcpu(sched, id)	(vcpu(id))
+#define vsched_id(vsched)	0
+#define vsched_default_vsched()	NULL
+#define vsched_default_vcpu(id)	(vcpu(id))
+
+#define vsched_vcpu_online_map(vsched)	(cpu_online_map)
+#define vsched_num_online_vcpus(vsched)	(num_online_cpus())
+#define vsched_pcpu_running_map(vsched)	(cpu_online_map)
+
+#define vcpu(id)		(&per_cpu(vcpu_struct, id))
+
+#define vcpu_vsched(vcpu)	NULL
+#define vcpu_last_pcpu(vcpu)	((vcpu)->id)
+#define vcpu_isset(vcpu, mask)	(cpu_isset((vcpu)->id, mask))
+#define vcpu_is_offline(vcpu)	(cpu_is_offline((vcpu)->id))
+
+#define vcpu_is_hot(vcpu)	(1)
+#endif	/* CONFIG_SCHED_VCPU */
+
+#define this_rq()		(vcpu_rq(this_vcpu()))
+#define task_rq(p)		(vcpu_rq(task_vcpu(p)))
+#define vcpu_rq(vcpu)		(&(vcpu)->rq)
+#define get_vcpu()		({ preempt_disable(); this_vcpu(); })
+#define put_vcpu()		({ put_cpu(); })
+#define rq_vcpu(__rq)		(container_of((__rq), struct vcpu_struct, rq))
+
+/**
+ * idle_task - return the idle task for a given cpu.
+ * @cpu: the processor in question.
+ */
+task_t *idle_task(int cpu)
+{
+	return pcpu(cpu)->idle;
+}
+
+#ifdef CONFIG_SMP
+static inline void update_rq_cpu_load(runqueue_t *this_rq)
+{
+	unsigned long old_load, this_load;
+	int i;
+
+	if (unlikely(this_rq->nr_running == 0)) {
+		for (i = 0; i < 3; i++)
+			this_rq->cpu_load[i] = 0;
+		return;
+	}
+
+	this_load = this_rq->nr_running * SCHED_LOAD_SCALE;
+	for (i = 0; i < 3; i++) {
+		unsigned long new_load = this_load;
+		int scale = 1 << i;
+		old_load = this_rq->cpu_load[i];
+		/*
+		 * Round up the averaging division if load is increasing. This
+		 * prevents us from getting stuck on 9 if the load is 10, for
+		 * example.
+		 */
+		if (new_load > old_load)
+			new_load += scale-1;
+		this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) / scale;
+	}
+}
+#else	/* CONFIG_SMP */
+static inline void update_rq_cpu_load(runqueue_t *this_rq)
+{
+}
+#endif	/* CONFIG_SMP */
+
+#ifdef CONFIG_SCHED_VCPU
+static inline void recalc_vcpu_timeslice(void)
+{
+	int val;
+
+	if (vcpu_timeslice < 0) {
+		val = nr_runnable_vcpus / nr_online_pcpus;
+		val = val > 31 ? 31 : val;
+		val = VCPU_TIMESLICE_MAX >> val;
+	} else
+		val = vcpu_timeslice;
+
+	/*
+	 * Optimization (?) - don't invalidate other CPU's cacheline
+	 * if vcpu_timeslice_actual is not changed.
+	 */
+	if (vcpu_timeslice_actual != val)
+		vcpu_timeslice_actual = val;
+}
+
+void fastcall vsched_cpu_online_map(struct vcpu_scheduler *vsched,
+		cpumask_t *mask)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&fairsched_lock, flags);
+	*mask = vsched->vcpu_online_map;
+	spin_unlock_irqrestore(&fairsched_lock, flags);
+}
+
+static inline void set_task_vsched(task_t *p, struct vcpu_scheduler *vsched)
+{
+	/* NOTE: set_task_cpu() is required after every set_task_vsched()! */
+	p->vsched = vsched;
+	p->vsched_id = vsched_id(vsched);
+}
+
+inline void set_task_cpu(struct task_struct *p, unsigned int vcpu_id)
+{
+	p->vcpu = vsched_vcpu(task_vsched(p), vcpu_id);
+	p->vcpu_id = vcpu_id;
+}
+
+static inline void set_task_vcpu(struct task_struct *p, vcpu_t vcpu)
+{
+	p->vcpu = vcpu;
+	p->vcpu_id = vcpu->id;
+}
+
+/* this is called when rq->nr_running changes from 0 to 1 */
+static void vcpu_attach(runqueue_t *rq)
+{
+	struct vcpu_scheduler *vsched;
+	vcpu_t vcpu;
+
+	vcpu = rq_vcpu(rq);
+	vsched = vcpu_vsched(vcpu);
+
+	BUG_ON(vcpu->active);
+	spin_lock(&fairsched_lock);
+	vcpu->active = 1;
+	if (!vcpu->running)
+		list_move_tail(&vcpu->list, &vsched->active_list);
+
+	fairsched_incrun(vsched->node);
+	nr_runnable_vcpus++;
+	spin_unlock(&fairsched_lock);
+
+	recalc_vcpu_timeslice();
+}
+
+/* this is called when rq->nr_running changes from 1 to 0 */
+static void vcpu_detach(runqueue_t *rq)
+{
+	struct vcpu_scheduler *vsched;
+	vcpu_t vcpu;
+
+	vcpu = rq_vcpu(rq);
+	vsched = vcpu_vsched(vcpu);
+	BUG_ON(!vcpu->active);
+
+	spin_lock(&fairsched_lock);
+	fairsched_decrun(vsched->node);
+
+	vcpu->active = 0;
+	if (!vcpu->running)
+		list_move_tail(&vcpu->list, &vsched->idle_list);
+	nr_runnable_vcpus--;
+	spin_unlock(&fairsched_lock);
+
+	recalc_vcpu_timeslice();
+}
+
+static inline void __vcpu_get(vcpu_t vcpu)
+{
+	struct pcpu_info *pcpu;
+	struct vcpu_scheduler *vsched;
+
+	BUG_ON(!this_vcpu()->running);
+
+	pcpu = this_pcpu();
+	vsched = vcpu_vsched(vcpu);
+
+	pcpu->vcpu = vcpu;
+	pcpu->vsched = vsched;
+
+	fairsched_inccpu(vsched->node);
+
+	list_move_tail(&vcpu->list, &vsched->running_list);
+	vcpu->start_time = jiffies;
+	vcpu->last_pcpu = pcpu->id;
+	vcpu->running = 1;
+	__set_bit(vcpu->id, vsched->vcpu_running_map.bits);
+	__set_bit(pcpu->id, vsched->pcpu_running_map.bits);
+#ifdef CONFIG_SMP
+	vcpu_rq(vcpu)->sd = pcpu->sd;
+#endif
+}
+
+static void vcpu_put(vcpu_t vcpu)
+{
+	struct vcpu_scheduler *vsched;
+	struct pcpu_info *cur_pcpu;
+	runqueue_t *rq;
+
+	vsched = vcpu_vsched(vcpu);
+	rq = vcpu_rq(vcpu);
+	cur_pcpu = this_pcpu();
+
+	BUG_ON(!vcpu->running);
+
+	spin_lock(&fairsched_lock);
+	vcpu->running = 0;
+	list_move_tail(&vcpu->list,
+		vcpu->active ? &vsched->active_list : &vsched->idle_list);
+	fairsched_deccpu(vsched->node);
+	__clear_bit(vcpu->id, vsched->vcpu_running_map.bits);
+	if (vsched != this_vsched())
+		__clear_bit(cur_pcpu->id, vsched->pcpu_running_map.bits);
+
+	vcpu->stop_time = jiffies;
+	if (!rq->nr_running)
+		rq->expired_timestamp = 0;
+	/* from this point task_running(prev_rq, prev) will be 0 */
+	rq->curr = cur_pcpu->idle;
+	update_rq_cpu_load(rq);
+	spin_unlock(&fairsched_lock);
+}
+
+/*
+ * Find an idle VCPU in given vsched. VCPU runned on this pcpu is 
+ * preferrable. Idle VCPU must be present in *cpus mask also.
+ */
+static vcpu_t find_idle_vcpu(struct vcpu_scheduler *vsched, cpumask_t *cpus)
+{
+	vcpu_t vcpu;
+	vcpu_t best_vcpu;
+	int this_pcpu = smp_processor_id();
+
+	best_vcpu = NULL;
+
+	spin_lock(&fairsched_lock);
+	if (!list_empty(&vsched->idle_list)) {
+		list_for_each_entry(vcpu, &vsched->idle_list, list) {
+			if (unlikely(vcpu_is_offline(vcpu)))
+				continue;
+			if (!cpu_isset(vcpu_last_pcpu(vcpu), *cpus))
+				continue;
+			best_vcpu = vcpu;
+			if (vcpu_last_pcpu(vcpu) == this_pcpu)
+				break;
+		}
+	}
+	spin_unlock(&fairsched_lock);
+	return best_vcpu;
+}
+
+/*
+ * find_busiest_vsched - find busiest vsched among running vsched's.
+ * An active vsched will be balanced when it becomes running.
+ *
+ * This routine must be simple and fast.
+ */
+static inline struct vcpu_scheduler *find_busiest_vsched(cpumask_t *cpus)
+{
+	vcpu_t vcpu;
+	int i, n;
+	cpumask_t mask, tmp_mask;
+	int step;
+
+	step = 0;
+
+	cpus_and(mask, *cpus, cpu_online_map);
+
+	/*
+	 * We implement simple round robin strategy to get 
+	 * PCPU id to start from. Last PCPU number is saved in 
+	 * per_cpu(find_busvs_last_pcpu).
+	 *
+	 * Assume the mask is 0x6789abcd and it's time to start
+	 * from PCPU #13:
+	 * 
+	 * 1) In the first pass we must use mask 0x6789a000:
+	 *
+	 *    ((0x6789abcd >> 13) << 13) => 0x6789a000
+	 *
+	 * 2) In the second pass we must use mask 0x00000bcd:
+	 *
+	 *      0x6789abcd ^ 0x6789a000  => 0x00000bcd
+	 */
+	n = per_cpu(find_busvs_last_pcpu, raw_smp_processor_id());
+
+	cpus_shift_right(tmp_mask, mask, n);
+	cpus_shift_left(tmp_mask, tmp_mask, n);
+restart:
+	for_each_cpu_mask(i, tmp_mask) {
+		vcpu = pcpu(i)->vcpu;
+		if (vcpu_is_offline(vcpu))
+			continue;
+		if (vcpu->vsched == &idle_vsched)
+			continue;
+		if (vcpu == this_vcpu())
+			continue;
+
+		/*
+		 * 'Busiest' mean there at least 2 tasks on this vsched.
+		 */
+		if (vcpu->rq.nr_running > 1) {
+			per_cpu(find_busvs_last_pcpu, raw_smp_processor_id())
+				= ++n % NR_CPUS;
+			return vcpu->vsched;
+		}
+	}
+	if (!step++) {
+		/* Second pass */
+		cpus_xor(tmp_mask, mask, tmp_mask);
+		goto restart;
+	}
+	return NULL;
+}
+
+/*
+ * Find idle VCPUs in a vsched, that can be balanced
+ */
+static inline vcpu_t find_idle_target(cpumask_t *cpus)
+{
+	vcpu_t vcpu;
+	struct vcpu_scheduler *vsched;
+
+	/*
+	 * First of all we have to find busiest vsched
+	 */
+	vsched = find_busiest_vsched(cpus);
+	if (vsched == NULL)
+		return NULL;
+
+	/*
+	 * Try to find an idle VCPU in the target vsched.
+	 * VCPU that was last running on this PCPU is preferred.
+	 */
+	vcpu = find_idle_vcpu(vsched, cpus);
+	if (!vcpu)
+		return NULL;
+	return vcpu;
+}
+
+static int idle_balance(vcpu_t this_cpu, runqueue_t *this_rq);
+
+static vcpu_t schedule_vcpu(vcpu_t cur_vcpu, cycles_t cycles)
+{
+	struct vcpu_scheduler *vsched;
+	vcpu_t vcpu, best_vcpu;
+	unsigned long time;
+	runqueue_t *rq;
+#ifdef CONFIG_FAIRSCHED
+	struct fairsched_node *node, *nodec;
+
+	nodec = vcpu_vsched(cur_vcpu)->node;
+	node = nodec;
+#endif
+
+	BUG_ON(!cur_vcpu->running);
+restart:
+	if (unlikely(system_state == SYSTEM_BOOTING))
+		goto affine;
+
+	spin_lock(&fairsched_lock);
+#ifdef CONFIG_FAIRSCHED
+	node = fairsched_schedule(node, nodec,
+			cur_vcpu->active,
+			cycles);
+	if (unlikely(node == NULL))
+		goto idle;
+
+	vsched = node->vsched;
+#else
+	vsched = &default_vsched;
+#endif
+	/* FIXME: optimize vcpu switching, maybe we do not need to call
+	   fairsched_schedule() at all if vcpu is still active and too
+	   little time have passed so far */
+	if (cur_vcpu->vsched == vsched && cur_vcpu->active &&
+	    jiffies - cur_vcpu->start_time < msecs_to_jiffies(vcpu_sched_timeslice)) {
+		vcpu = cur_vcpu;
+		goto done;
+	}
+
+	if (list_empty(&vsched->active_list)) {
+		/* nothing except for this cpu can be scheduled */
+		if (likely(cur_vcpu->vsched == vsched && cur_vcpu->active)) {
+			/*
+			 * Current vcpu is the one we need. We have not
+			 * put it yet, so it's not on the active_list.
+			 */
+			vcpu = cur_vcpu;
+			vcpu->start_time = jiffies;
+			goto done;
+		} else
+			goto none;
+	}
+
+	/*
+	 * Ok, we are going to choose new VCPU now.
+	 */
+	time = jiffies - msecs_to_jiffies(vcpu_hot_timeslice);
+	/*
+	 * First vcpu in the list is more preferable, because it has waited
+	 * for CPU longer than others. If all vcpu's are hot, use the oldest
+	 * one.
+	 */
+	best_vcpu = list_entry(vsched->active_list.next,
+						struct vcpu_struct, list);
+	list_for_each_entry(vcpu, &vsched->active_list, list) {
+		/* Skip hot VCPU's that were running on another CPU's */
+		if (vcpu->stop_time > time && 
+				vcpu_last_pcpu(vcpu) != raw_smp_processor_id())
+			continue;
+
+		best_vcpu = vcpu;
+		break;
+	}
+	vcpu = best_vcpu;
+
+	/* add it to running list */
+	__vcpu_get(vcpu);
+done:
+	spin_unlock(&fairsched_lock);
+
+	rq = vcpu_rq(vcpu);
+	if (unlikely(vcpu != cur_vcpu)) {
+		spin_unlock(&vcpu_rq(cur_vcpu)->lock);
+		spin_lock(&rq->lock);
+		if (unlikely(!rq->nr_running)) {
+			/* race with balancing? */
+			spin_unlock(&rq->lock);
+			vcpu_put(vcpu);
+			spin_lock(&vcpu_rq(cur_vcpu)->lock);
+			goto restart;
+		}
+	}
+	BUG_ON(!rq->nr_running);
+	return vcpu;
+
+none:
+#ifdef CONFIG_FAIRSCHED
+	spin_unlock(&fairsched_lock);
+
+	/* fairsched doesn't schedule more CPUs than we have active */
+	BUG_ON(1);
+#else
+	goto idle;
+#endif
+
+idle:
+	vcpu = task_vcpu(this_pcpu()->idle);
+	__vcpu_get(vcpu);
+	spin_unlock(&fairsched_lock);
+	spin_unlock(&vcpu_rq(cur_vcpu)->lock);
+
+	spin_lock(&vcpu_rq(vcpu)->lock);
+	return vcpu;
+
+affine:
+	vcpu = vsched_vcpu(&default_vsched, raw_smp_processor_id());
+	/* current VCPU busy, continue */
+	if (cur_vcpu == vcpu && vcpu->active)
+		return cur_vcpu;
+	/* current is idle and nothing to run, keep idle */
+	if (vcpu_vsched(cur_vcpu) == &idle_vsched && !vcpu->active)
+		return cur_vcpu;
+
+	/* need to switch to idle... */
+	if (cur_vcpu == vcpu) {
+		spin_lock(&fairsched_lock);
+		goto idle;
+	}
+
+	/* ... and from idle */
+	spin_lock(&fairsched_lock);
+	__vcpu_get(vcpu);
+	goto done;
+}
+
+int vcpu_online(int cpu)
+{
+	return cpu_isset(cpu, vsched_vcpu_online_map(this_vsched()));
+}
+#else /* CONFIG_SCHED_VCPU */
+
+#define set_task_vsched(task, vsched)		do { } while (0)
+
+static inline void vcpu_attach(runqueue_t *rq)
+{
+}
+
+static inline void vcpu_detach(runqueue_t *rq)
+{
+}
+
+static inline void vcpu_put(vcpu_t vcpu)
+{
+}
+
+static inline vcpu_t schedule_vcpu(vcpu_t prev_vcpu, cycles_t cycles)
+{
+	return prev_vcpu;
+}
+
+static inline void set_task_vcpu(struct task_struct *p, vcpu_t vcpu)
+{
+	set_task_pcpu(p, vcpu->id);
+}
+
+#endif /* CONFIG_SCHED_VCPU */
 
-#define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
-#define this_rq()		(&__get_cpu_var(runqueues))
-#define task_rq(p)		cpu_rq(task_cpu(p))
-#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
 
 #ifndef prepare_arch_switch
 # define prepare_arch_switch(next)	do { } while (0)
@@ -212,6 +839,27 @@ for (domain = rcu_dereference(cpu_rq(cpu
 # define finish_arch_switch(prev)	do { } while (0)
 #endif
 
+struct kernel_stat_glob kstat_glob;
+spinlock_t kstat_glb_lock = SPIN_LOCK_UNLOCKED;
+EXPORT_SYMBOL(kstat_glob);
+EXPORT_SYMBOL(kstat_glb_lock);
+
+static inline void finish_vsched_switch(runqueue_t *rq, vcpu_t prev_vcpu)
+{
+	vcpu_t vcpu;
+
+	vcpu = rq_vcpu(rq);
+	if (prev_vcpu != vcpu) {
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+		local_irq_disable();
+		vcpu_put(prev_vcpu);
+		local_irq_enable();
+#else
+		vcpu_put(prev_vcpu);
+#endif
+	}
+}
+
 #ifndef __ARCH_WANT_UNLOCKED_CTXSW
 static inline int task_running(runqueue_t *rq, task_t *p)
 {
@@ -224,11 +872,15 @@ static inline void prepare_lock_switch(r
 
 static inline void finish_lock_switch(runqueue_t *rq, task_t *prev)
 {
+	vcpu_t prev_vcpu;
 #ifdef CONFIG_DEBUG_SPINLOCK
 	/* this is a valid case when another task releases the spinlock */
 	rq->lock.owner = current;
 #endif
-	spin_unlock_irq(&rq->lock);
+	prev_vcpu = task_vcpu(prev);
+	spin_unlock(&rq->lock);
+	finish_vsched_switch(rq, prev_vcpu);
+	local_irq_enable();
 }
 
 #else /* __ARCH_WANT_UNLOCKED_CTXSW */
@@ -260,6 +912,8 @@ static inline void prepare_lock_switch(r
 
 static inline void finish_lock_switch(runqueue_t *rq, task_t *prev)
 {
+	/* vcpu_put() should be done before setting prev->oncpu = 0 */
+	finish_vsched_switch(rq, task_vcpu(prev));
 #ifdef CONFIG_SMP
 	/*
 	 * After ->oncpu is cleared, the task can be moved to a different CPU.
@@ -302,26 +956,234 @@ static inline void task_rq_unlock(runque
 	spin_unlock_irqrestore(&rq->lock, *flags);
 }
 
+#ifdef CONFIG_VE
+#define ve_nr_unint_inc(env, cpu)					\
+	do {								\
+		VE_CPU_STATS((env), (cpu))->nr_unint++;			\
+	} while(0)
+#define ve_nr_unint_dec(env, cpu)					\
+	do {								\
+		VE_CPU_STATS((env), (cpu))->nr_unint--;			\
+	} while(0)
+
+#define cycles_after(a, b)	((long long)(b) - (long long)(a) < 0)
+
+cycles_t __ve_sched_get_idle_time(struct ve_struct *ve, int cpu)
+{
+	struct ve_cpu_stats *ve_stat;
+	unsigned v;
+	cycles_t strt, ret, cycles;
+
+	ve_stat = VE_CPU_STATS(ve, cpu);
+	do {
+		v = read_seqcount_begin(&ve_stat->stat_lock);
+		ret = ve_stat->idle_time;
+		strt = ve_stat->strt_idle_time;
+		if (strt && nr_uninterruptible_ve(ve) == 0) {
+			cycles = get_cycles();
+			if (cycles_after(cycles, strt))
+				ret += cycles - strt;
+		}
+	} while (read_seqcount_retry(&ve_stat->stat_lock, v));
+	return ret;
+}
+EXPORT_SYMBOL(__ve_sched_get_idle_time);
+
+cycles_t ve_sched_get_iowait_time(int cpu)
+{
+	struct ve_struct *ve;
+	struct ve_cpu_stats *ve_stat;
+	unsigned v;
+	cycles_t strt, ret, cycles;
+	vcpu_t vcpu;
+
+	preempt_disable();
+	ret = 0;
+	vcpu = vsched_vcpu(this_vsched(), cpu);
+	if (!vcpu)
+		goto done;
+
+	ve = get_exec_env();
+	ve_stat = VE_CPU_STATS(ve, cpu);
+	do {
+		struct runqueue *rq;
+		rq = vcpu_rq(vcpu);
+		v = read_seqcount_begin(&ve_stat->stat_lock);
+		ret = ve_stat->iowait_time;
+		strt = ve_stat->strt_idle_time;
+		if (strt && atomic_read(&rq->nr_iowait) > 0) {
+			cycles = get_cycles();
+			if (cycles_after(cycles, strt))
+				ret += cycles - strt;
+		}
+	} while (read_seqcount_retry(&ve_stat->stat_lock, v));
+done:
+	preempt_enable();
+	return ret;
+}
+
+EXPORT_SYMBOL(ve_sched_get_iowait_time);
+
+static inline void ve_stop_idle(struct ve_struct *ve,
+		vcpu_t vcpu, cycles_t cycles)
+{
+	struct ve_cpu_stats *ve_stat;
+
+	ve_stat = VE_CPU_STATS(ve, vcpu->id);
+
+	write_seqcount_begin(&ve_stat->stat_lock);
+	if (ve_stat->strt_idle_time) {
+		if (cycles_after(cycles, ve_stat->strt_idle_time)) {
+			if (atomic_read(&vcpu_rq(vcpu)->nr_iowait) == 0)
+				ve_stat->idle_time += cycles -
+					ve_stat->strt_idle_time;
+			else
+				ve_stat->iowait_time += cycles -
+					ve_stat->strt_idle_time;
+		}
+		ve_stat->strt_idle_time = 0;
+	}
+	write_seqcount_end(&ve_stat->stat_lock);
+}
+
+static inline void ve_strt_idle(struct ve_struct *ve,
+		unsigned int cpu, cycles_t cycles)
+{
+	struct ve_cpu_stats *ve_stat;
+
+	ve_stat = VE_CPU_STATS(ve, cpu);
+
+	write_seqcount_begin(&ve_stat->stat_lock);
+	ve_stat->strt_idle_time = cycles;
+	write_seqcount_end(&ve_stat->stat_lock);
+}
+
+#define ve_nr_running_inc(env, cpu)		do {			\
+		VE_CPU_STATS((env), (cpu))->nr_running++;		\
+	} while (0)
+#define ve_nr_running_dec(env, cpu)		do {			\
+		VE_CPU_STATS((env), (cpu))->nr_running--;		\
+	} while (0)
+
+void ve_sched_attach(struct ve_struct *envid)
+{
+	struct task_struct *tsk;
+	unsigned int cpu;
+
+	tsk = current;
+	preempt_disable();
+	cpu = task_cpu(tsk);
+	ve_nr_running_dec(VE_TASK_INFO(tsk)->owner_env, cpu);
+	ve_nr_running_inc(envid, cpu);
+	preempt_enable();
+}
+EXPORT_SYMBOL(ve_sched_attach);
+
+static inline void write_wakeup_stamp(struct task_struct *p, cycles_t cyc)
+{
+	struct ve_task_info *ti;
+
+	ti = VE_TASK_INFO(p);
+	write_seqcount_begin(&ti->wakeup_lock);
+	ti->wakeup_stamp = cyc;
+	write_seqcount_end(&ti->wakeup_lock);
+}
+
+static inline void update_sched_lat(struct task_struct *t, cycles_t cycles)
+{
+	int cpu;
+	cycles_t ve_wstamp;
+
+	/* safe due to runqueue lock */
+	cpu = smp_processor_id();
+	ve_wstamp = t->ve_task_info.wakeup_stamp;
+
+	if (ve_wstamp && cycles > ve_wstamp) {
+		KSTAT_LAT_PCPU_ADD(&kstat_glob.sched_lat,
+				cpu, cycles - ve_wstamp);
+		KSTAT_LAT_PCPU_ADD(&t->ve_task_info.exec_env->sched_lat_ve,
+				cpu, cycles - ve_wstamp);
+	}
+}
+
+static inline void update_ve_task_info(task_t *prev, cycles_t cycles)
+{
+	if (prev != this_pcpu()->idle) {
+		VE_CPU_STATS(prev->ve_task_info.owner_env,
+				smp_processor_id())->used_time +=
+			cycles - prev->ve_task_info.sched_time;
+
+		prev->ve_task_info.sched_time = cycles;
+	}
+}
+#else /* CONFIG_VE */
+#define ve_nr_running_inc(env, cpu)		do { } while(0)
+#define ve_nr_running_dec(env, cpu)		do { } while(0)
+#define ve_nr_unint_inc(env, cpu)		do { } while(0)
+#define ve_nr_unint_dec(env, cpu)		do { } while(0)
+#define update_ve_task_info(prev, cycles)	do { } while (0)
+#define ve_stop_idle(ve, vcpu, cycles)		do { } while (0)
+#define ve_strt_idle(ve, cpu, cycles)		do { } while (0)
+#endif /* CONFIG_VE */
+
+struct task_nrs_struct {
+	long nr_running;
+	long nr_unint;
+	long nr_stopped;
+	long nr_sleeping;
+	atomic_t nr_iowait;
+	long long nr_switches;
+} ____cacheline_aligned_in_smp;
+
+static struct task_nrs_struct glob_task_nrs[NR_CPUS];
+#define nr_running_inc(cpu)	do { glob_task_nrs[cpu].nr_running++; } while (0)
+#define nr_running_dec(cpu)	do { glob_task_nrs[cpu].nr_running--; } while (0)
+#define nr_unint_inc(cpu)	do { glob_task_nrs[cpu].nr_unint++; } while (0)
+#define nr_unint_dec(cpu)	do { glob_task_nrs[cpu].nr_unint--; } while (0)
+#define nr_stopped_inc(cpu)	do { glob_task_nrs[cpu].nr_stopped++; } while (0)
+#define nr_stopped_dec(cpu)	do { glob_task_nrs[cpu].nr_stopped--; } while (0)
+#define nr_sleeping_inc(cpu)	do { glob_task_nrs[cpu].nr_sleeping++; } while (0)
+#define nr_sleeping_dec(cpu)	do { glob_task_nrs[cpu].nr_sleeping--; } while (0)
+#define nr_iowait_inc(cpu)	do {				\
+		atomic_inc(&glob_task_nrs[cpu].nr_iowait);	\
+	} while (0)
+#define nr_iowait_dec(cpu)	do {				\
+		atomic_dec(&glob_task_nrs[cpu].nr_iowait);	\
+	} while (0)
+
+
+unsigned long nr_zombie = 0;   /* protected by tasklist_lock */
+EXPORT_SYMBOL(nr_zombie);
+
+atomic_t nr_dead = ATOMIC_INIT(0);
+EXPORT_SYMBOL(nr_dead);
+
 #ifdef CONFIG_SCHEDSTATS
+
 /*
  * bump this up when changing the output format or the meaning of an existing
  * format, so that tools can adapt (or abort)
  */
 #define SCHEDSTAT_VERSION 12
 
-static int show_schedstat(struct seq_file *seq, void *v)
+static int show_schedstat_vsched(struct seq_file *seq,
+		struct vcpu_scheduler *vsched)
 {
 	int cpu;
 
-	seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
-	seq_printf(seq, "timestamp %lu\n", jiffies);
-	for_each_online_cpu(cpu) {
-		runqueue_t *rq = cpu_rq(cpu);
+	seq_printf(seq, "vsched%d\n", vsched->id);
+
+	for_each_cpu_mask (cpu, vsched_vcpu_online_map(vsched)) {
+		vcpu_t vcpu;
+		runqueue_t *rq;
 #ifdef CONFIG_SMP
 		struct sched_domain *sd;
 		int dcnt = 0;
 #endif
 
+		vcpu = vsched_vcpu(vsched, cpu);
+		rq = vcpu_rq(vcpu);
+
 		/* runqueue-specific stats */
 		seq_printf(seq,
 		    "cpu%d %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
@@ -337,7 +1199,7 @@ static int show_schedstat(struct seq_fil
 #ifdef CONFIG_SMP
 		/* domain-specific stats */
 		preempt_disable();
-		for_each_domain(cpu, sd) {
+		for_each_domain(vcpu, sd) {
 			enum idle_type itype;
 			char mask_str[NR_CPUS];
 
@@ -367,6 +1229,20 @@ static int show_schedstat(struct seq_fil
 	return 0;
 }
 
+static int show_schedstat(struct seq_file *seq, void *v)
+{
+	struct vcpu_scheduler *vsched;
+
+	seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
+	seq_printf(seq, "timestamp %lu\n", jiffies);
+
+	spin_lock(&vsched_list_lock);
+	list_for_each_entry (vsched, &vsched_list, list)
+		show_schedstat_vsched(seq, vsched);
+	spin_unlock(&vsched_list_lock);
+	return 0;
+}
+
 static int schedstat_open(struct inode *inode, struct file *file)
 {
 	unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32);
@@ -524,17 +1400,17 @@ static inline void sched_info_depart(tas
 static inline void
 __sched_info_switch(struct task_struct *prev, struct task_struct *next)
 {
-	struct runqueue *rq = task_rq(prev);
+	int cpu = smp_processor_id();
 
 	/*
 	 * prev now departs the cpu.  It's not interesting to record
 	 * stats about how efficient we were at scheduling the idle
 	 * process, however.
 	 */
-	if (prev != rq->idle)
+	if (prev != pcpu(cpu)->idle)
 		sched_info_depart(prev);
 
-	if (next != rq->idle)
+	if (next != pcpu(cpu)->idle)
 		sched_info_arrive(next);
 }
 static inline void
@@ -638,10 +1514,25 @@ static int effective_prio(task_t *p)
 static inline void __activate_task(task_t *p, runqueue_t *rq)
 {
 	prio_array_t *array = rq->active;
+	cycles_t cycles;
+#ifdef CONFIG_VE
+	struct ve_struct *ve;
+
+	cycles = get_cycles();
+	write_wakeup_stamp(p, cycles);
+	p->ve_task_info.sleep_time += cycles;
+	ve = VE_TASK_INFO(p)->owner_env;
+#endif
 	if (!rt_task(p) && unlikely(EXPIRED_STARVING(rq)))
 		array = rq->expired;
 	enqueue_task(p, array);
 	rq->nr_running++;
+	ve_nr_running_inc(ve, task_cpu(p));
+	nr_running_inc(smp_processor_id());
+	if (rq->nr_running == 1) {
+		ve_stop_idle(ve, task_vcpu(p), cycles);
+		vcpu_attach(rq);
+	}
 }
 
 /*
@@ -774,9 +1665,40 @@ static void activate_task(task_t *p, run
  */
 static void deactivate_task(struct task_struct *p, runqueue_t *rq)
 {
+	cycles_t cycles;
+#ifdef CONFIG_VE
+	unsigned int cpu, pcpu;
+	struct ve_struct *ve;
+
+	cycles = get_cycles();
+	cpu = task_cpu(p);
+	pcpu = smp_processor_id();
+	ve = p->ve_task_info.owner_env;
+
+	p->ve_task_info.sleep_time -= cycles;
+#endif
+	if (p->state == TASK_UNINTERRUPTIBLE) {
+		ve_nr_unint_inc(ve, cpu);
+		nr_unint_inc(pcpu);
+	}
+	if (p->state == TASK_INTERRUPTIBLE) {
+		rq->nr_sleeping++;
+		nr_sleeping_inc(pcpu);
+	}
+	if (p->state == TASK_STOPPED) {
+		rq->nr_stopped++;
+		nr_stopped_inc(pcpu);
+	}
+
+	ve_nr_running_dec(VE_TASK_INFO(p)->owner_env, cpu);
+	nr_running_dec(pcpu);
 	rq->nr_running--;
 	dequeue_task(p, p->array);
 	p->array = NULL;
+	if (rq->nr_running == 0) {
+		ve_strt_idle(ve, cpu, cycles);
+		vcpu_detach(rq);
+	}
 }
 
 /*
@@ -787,18 +1709,22 @@ static void deactivate_task(struct task_
  * the target CPU.
  */
 #ifdef CONFIG_SMP
+/* FIXME: need to add vsched arg */
 static void resched_task(task_t *p)
 {
 	int cpu;
 
+#if 0
+	/* FIXME: this fails due to idle rq->curre == idle */
 	assert_spin_locked(&task_rq(p)->lock);
+#endif
 
 	if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
 		return;
 
 	set_tsk_thread_flag(p, TIF_NEED_RESCHED);
 
-	cpu = task_cpu(p);
+	cpu = task_pcpu(p);
 	if (cpu == smp_processor_id())
 		return;
 
@@ -810,7 +1736,10 @@ static void resched_task(task_t *p)
 #else
 static inline void resched_task(task_t *p)
 {
+#if 0
+	/* FIXME: this fails due to idle rq->curre == idle */
 	assert_spin_locked(&task_rq(p)->lock);
+#endif
 	set_tsk_need_resched(p);
 }
 #endif
@@ -821,15 +1750,41 @@ static inline void resched_task(task_t *
  */
 inline int task_curr(const task_t *p)
 {
-	return cpu_curr(task_cpu(p)) == p;
+	return task_rq(p)->curr == p;
 }
 
-#ifdef CONFIG_SMP
+EXPORT_SYMBOL(task_curr);
+
+/**
+ * idle_cpu - is a given cpu idle currently?
+ * @cpu: the processor in question.
+ */
+inline int idle_cpu(int cpu)
+{
+#ifdef CONFIG_SCHED_VCPU
+	return pcpu(cpu)->vsched == &idle_vsched;
+#else
+	return vcpu_rq(pcpu(cpu)->vcpu)->curr == pcpu(cpu)->idle;
+#endif
+}
+
+EXPORT_SYMBOL_GPL(idle_cpu);
+
+static inline int idle_vcpu(vcpu_t cpu)
+{
+#ifdef CONFIG_SCHED_VCPU
+	return !cpu->active;
+#else
+	return idle_cpu(cpu->id);
+#endif
+}
+
+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_VCPU)
 typedef struct {
 	struct list_head list;
 
 	task_t *task;
-	int dest_cpu;
+	vcpu_t dest_cpu;
 
 	struct completion done;
 } migration_req_t;
@@ -838,7 +1793,7 @@ typedef struct {
  * The task's runqueue lock must be held.
  * Returns true if you have to wait for migration thread.
  */
-static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req)
+static int migrate_task(task_t *p, vcpu_t dest_cpu, migration_req_t *req)
 {
 	runqueue_t *rq = task_rq(p);
 
@@ -846,8 +1801,13 @@ static int migrate_task(task_t *p, int d
 	 * If the task is not on a runqueue (and not running), then
 	 * it is sufficient to simply update the task's cpu field.
 	 */
+#ifdef CONFIG_SCHED_VCPU
+	BUG_ON(task_vsched(p) == &idle_vsched);
+	BUG_ON(vcpu_vsched(dest_cpu) == &idle_vsched);
+#endif
 	if (!p->array && !task_running(rq, p)) {
-		set_task_cpu(p, dest_cpu);
+		set_task_vsched(p, vcpu_vsched(dest_cpu));
+		set_task_vcpu(p, dest_cpu);
 		return 0;
 	}
 
@@ -887,6 +1847,7 @@ repeat:
 	}
 	task_rq_unlock(rq, &flags);
 }
+EXPORT_SYMBOL_GPL(wait_task_inactive);
 
 /***
  * kick_process - kick a running thread to enter/exit the kernel
@@ -906,21 +1867,26 @@ void kick_process(task_t *p)
 	int cpu;
 
 	preempt_disable();
-	cpu = task_cpu(p);
+	cpu = task_pcpu(p);
 	if ((cpu != smp_processor_id()) && task_curr(p))
+		/* FIXME: ??? think over */
+		/* should add something like get_pcpu(cpu)->vcpu->id == task_cpu(p),
+		   but with serialization of vcpu access... */
 		smp_send_reschedule(cpu);
 	preempt_enable();
 }
+#endif
 
+#ifdef CONFIG_SMP
 /*
  * Return a low guess at the load of a migration-source cpu.
  *
  * We want to under-estimate the load of migration sources, to
  * balance conservatively.
  */
-static inline unsigned long source_load(int cpu, int type)
+static inline unsigned long source_load(vcpu_t cpu, int type)
 {
-	runqueue_t *rq = cpu_rq(cpu);
+	runqueue_t *rq = vcpu_rq(cpu);
 	unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
 	if (type == 0)
 		return load_now;
@@ -931,9 +1897,9 @@ static inline unsigned long source_load(
 /*
  * Return a high guess at the load of a migration-target cpu
  */
-static inline unsigned long target_load(int cpu, int type)
+static inline unsigned long target_load(vcpu_t cpu, int type)
 {
-	runqueue_t *rq = cpu_rq(cpu);
+	runqueue_t *rq = vcpu_rq(cpu);
 	unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
 	if (type == 0)
 		return load_now;
@@ -946,33 +1912,35 @@ static inline unsigned long target_load(
  * domain.
  */
 static struct sched_group *
-find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
+find_idlest_group(struct sched_domain *sd, struct task_struct *p, vcpu_t this_cpu)
 {
 	struct sched_group *idlest = NULL, *this = NULL, *group = sd->groups;
 	unsigned long min_load = ULONG_MAX, this_load = 0;
 	int load_idx = sd->forkexec_idx;
 	int imbalance = 100 + (sd->imbalance_pct-100)/2;
+	struct vcpu_scheduler *vsched;
+	vcpu_t vcpu;
+	int this_pcpu;
 
+	vsched = vcpu_vsched(this_cpu);
+	this_pcpu = vcpu_last_pcpu(this_cpu);
 	do {
 		unsigned long load, avg_load;
 		int local_group;
 		int i;
 
-		/* Skip over this group if it has no CPUs allowed */
-		if (!cpus_intersects(group->cpumask, p->cpus_allowed))
-			goto nextgroup;
-
-		local_group = cpu_isset(this_cpu, group->cpumask);
+		local_group = cpu_isset(this_pcpu, group->cpumask);
 
 		/* Tally up the load of all CPUs in the group */
 		avg_load = 0;
 
 		for_each_cpu_mask(i, group->cpumask) {
+			vcpu = pcpu(i)->vcpu;
 			/* Bias balancing toward cpus of our domain */
 			if (local_group)
-				load = source_load(i, load_idx);
+				load = source_load(vcpu, load_idx);
 			else
-				load = target_load(i, load_idx);
+				load = target_load(vcpu, load_idx);
 
 			avg_load += load;
 		}
@@ -987,7 +1955,6 @@ find_idlest_group(struct sched_domain *s
 			min_load = avg_load;
 			idlest = group;
 		}
-nextgroup:
 		group = group->next;
 	} while (group != sd->groups);
 
@@ -999,46 +1966,62 @@ nextgroup:
 /*
  * find_idlest_queue - find the idlest runqueue among the cpus in group.
  */
-static int
-find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
+static vcpu_t
+find_idlest_cpu(struct sched_group *group, struct task_struct *p, vcpu_t this_cpu)
 {
-	cpumask_t tmp;
 	unsigned long load, min_load = ULONG_MAX;
-	int idlest = -1;
+	cpumask_t vmask;
+	struct vcpu_scheduler *vsched;
+	vcpu_t idlest = (vcpu_t)-1;
+	vcpu_t vcpu;
 	int i;
 
-	/* Traverse only the allowed CPUs */
-	cpus_and(tmp, group->cpumask, p->cpus_allowed);
+	vsched = vcpu_vsched(this_cpu);
+	BUG_ON(vsched != task_vsched(p));
 
-	for_each_cpu_mask(i, tmp) {
-		load = source_load(i, 0);
+	cpus_and(vmask, vsched_vcpu_online_map(vsched), p->cpus_allowed);
+	for_each_cpu_mask(i, vmask) {
+		vcpu = vsched_vcpu(vsched, i);
+
+		if (!cpu_isset(vcpu_last_pcpu(vcpu), group->cpumask))
+			continue;
+		if (vcpu_is_offline(vcpu))
+			continue;
 
-		if (load < min_load || (load == min_load && i == this_cpu)) {
+		load = source_load(vcpu, 0);
+
+		if (load < min_load || (load == min_load && vcpu == this_cpu)) {
 			min_load = load;
-			idlest = i;
+			idlest = vcpu;
 		}
 	}
 
 	return idlest;
 }
 
-static int
-find_idlest_cpu_nodomain(struct task_struct *p, int this_cpu)
+static vcpu_t
+find_idlest_cpu_nodomain(struct task_struct *p, vcpu_t this_cpu)
 {
 	cpumask_t tmp;
 	unsigned long load, min_load = ULONG_MAX;
-	int idlest = -1;
+	struct vcpu_scheduler *vsched;
+	vcpu_t idlest = (vcpu_t)-1;
+	vcpu_t vcpu;
 	int i;
 
+	vsched = vcpu_vsched(this_cpu);
+	BUG_ON(vsched != task_vsched(p));
+
 	/* Traverse only the allowed CPUs */
-	cpus_and(tmp, cpu_online_map, p->cpus_allowed);
+	cpus_and(tmp, vsched_vcpu_online_map(vsched), p->cpus_allowed);
 
 	for_each_cpu_mask(i, tmp) {
-		load = target_load(i, 1);
+		vcpu = vsched_vcpu(vsched, i);
+		load = target_load(vcpu, 1);
 
 		if (load < min_load) {
 			min_load = load;
-			idlest = i;
+			idlest = vcpu;
 		}
 	}
 	return idlest;
@@ -1058,7 +2041,7 @@ find_idlest_cpu_nodomain(struct task_str
 
 int affinity_load_balancing = 0;
 
-static int sched_balance_self(int cpu, int flag)
+static vcpu_t sched_balance_self(vcpu_t cpu, int flag)
 {
 	struct task_struct *t = current;
 	struct sched_domain *tmp, *sd = NULL;
@@ -1073,7 +2056,7 @@ static int sched_balance_self(int cpu, i
 	while (sd) {
 		cpumask_t span;
 		struct sched_group *group;
-		int new_cpu;
+		vcpu_t new_cpu;
 		int weight;
 
 		span = sd->span;
@@ -1082,7 +2065,7 @@ static int sched_balance_self(int cpu, i
 			goto nextlevel;
 
 		new_cpu = find_idlest_cpu(group, t, cpu);
-		if (new_cpu == -1 || new_cpu == cpu)
+		if (new_cpu == (vcpu_t)(-1) || new_cpu == cpu)
 			goto nextlevel;
 
 		/* Now try balancing at a lower domain level */
@@ -1113,21 +2096,27 @@ nextlevel:
  * Returns the CPU we should wake onto.
  */
 #if defined(ARCH_HAS_SCHED_WAKE_IDLE)
-static int wake_idle(int cpu, task_t *p)
+static vcpu_t wake_idle(vcpu_t cpu, task_t *p)
 {
-	cpumask_t tmp;
+	cpumask_t vtmp;
 	struct sched_domain *sd;
+	struct vcpu_scheduler *vsched;
 	int i;
 
-	if (idle_cpu(cpu))
+	if (idle_vcpu(cpu))
 		return cpu;
 
+	vsched = vcpu_vsched(cpu);
+	cpus_and(vtmp, vsched_vcpu_online_map(vsched), p->cpus_allowed);
 	for_each_domain(cpu, sd) {
 		if (sd->flags & SD_WAKE_IDLE) {
-			cpus_and(tmp, sd->span, p->cpus_allowed);
-			for_each_cpu_mask(i, tmp) {
-				if (idle_cpu(i))
-					return i;
+			for_each_cpu_mask(i, vtmp) {
+				vcpu_t vcpu;
+				vcpu = vsched_vcpu(vsched, i);
+				if (!cpu_isset(vcpu_last_pcpu(vcpu), sd->span))
+					continue;
+				if (idle_vcpu(vcpu))
+					return vcpu;
 			}
 		}
 		else
@@ -1136,7 +2125,7 @@ static int wake_idle(int cpu, task_t *p)
 	return cpu;
 }
 #else
-static inline int wake_idle(int cpu, task_t *p)
+static inline vcpu_t wake_idle(vcpu_t cpu, task_t *p)
 {
 	return cpu;
 }
@@ -1158,15 +2147,17 @@ static inline int wake_idle(int cpu, tas
  */
 static int try_to_wake_up(task_t *p, unsigned int state, int sync)
 {
-	int cpu, this_cpu, success = 0;
+	vcpu_t cpu, this_cpu;
+	int success = 0;
 	unsigned long flags;
 	long old_state;
 	runqueue_t *rq;
 #ifdef CONFIG_SMP
 	unsigned long load, this_load;
 	struct sched_domain *sd, *this_sd = NULL;
-	int new_cpu;
+	vcpu_t new_cpu;
 #endif
+	cpu = NULL;
 
 	rq = task_rq_lock(p, &flags);
 	old_state = p->state;
@@ -1176,8 +2167,8 @@ static int try_to_wake_up(task_t *p, uns
 	if (p->array)
 		goto out_running;
 
-	cpu = task_cpu(p);
-	this_cpu = smp_processor_id();
+	cpu = task_vcpu(p);
+	this_cpu = this_vcpu();
 
 #ifdef CONFIG_SMP
 	if (unlikely(task_running(rq, p)))
@@ -1186,20 +2177,25 @@ static int try_to_wake_up(task_t *p, uns
 	new_cpu = cpu;
 
 	schedstat_inc(rq, ttwu_cnt);
+	/* FIXME: add vsched->last_vcpu array to optimize wakeups in different vsched */
+	if (vcpu_vsched(cpu) != vcpu_vsched(this_cpu))
+		goto out_set_cpu;
 	if (cpu == this_cpu) {
 		schedstat_inc(rq, ttwu_local);
 		goto out_set_cpu;
 	}
 
 	for_each_domain(this_cpu, sd) {
-		if (cpu_isset(cpu, sd->span)) {
+		if (cpu_isset(vcpu_last_pcpu(cpu), sd->span)) {
 			schedstat_inc(sd, ttwu_wake_remote);
 			this_sd = sd;
 			break;
 		}
 	}
 
-	if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
+	if (unlikely(!vcpu_isset(this_cpu, p->cpus_allowed)))
+		goto out_set_cpu;
+	if (vcpu_is_offline(this_cpu))
 		goto out_set_cpu;
 
 	/*
@@ -1255,7 +2251,7 @@ static int try_to_wake_up(task_t *p, uns
 out_set_cpu:
 	new_cpu = wake_idle(new_cpu, p);
 	if (new_cpu != cpu) {
-		set_task_cpu(p, new_cpu);
+		set_task_vcpu(p, new_cpu);
 		task_rq_unlock(rq, &flags);
 		/* might preempt at this point */
 		rq = task_rq_lock(p, &flags);
@@ -1265,13 +2261,21 @@ out_set_cpu:
 		if (p->array)
 			goto out_running;
 
-		this_cpu = smp_processor_id();
-		cpu = task_cpu(p);
+		this_cpu = this_vcpu();
+		cpu = task_vcpu(p);
 	}
 
 out_activate:
 #endif /* CONFIG_SMP */
-	if (old_state == TASK_UNINTERRUPTIBLE) {
+	if (old_state == TASK_INTERRUPTIBLE) {
+		nr_sleeping_dec(smp_processor_id());
+		rq->nr_sleeping--;
+	} else if (old_state == TASK_STOPPED) {
+		nr_stopped_dec(smp_processor_id());
+		rq->nr_stopped--;
+	} else if (old_state == TASK_UNINTERRUPTIBLE) {
+		nr_unint_dec(smp_processor_id());
+		ve_nr_unint_dec(p->ve_task_info.owner_env, task_cpu(p));
 		rq->nr_uninterruptible--;
 		/*
 		 * Tasks on involuntary sleep don't earn
@@ -1326,17 +2330,45 @@ int fastcall wake_up_state(task_t *p, un
 }
 
 /*
+ * init is special, it is forked from swapper (idle_vsched) and should
+ * belong to default_vsched, so we have to change it's vsched/fairsched manually
+ */
+static void wake_up_init(task_t *p)
+{
+	runqueue_t *rq;
+	unsigned long flags;
+
+	/* we should change both fairsched node and vsched here */
+	set_task_vsched(p, &default_vsched);
+	set_task_cpu(p, raw_smp_processor_id());
+
+	/*
+	 * can't call wake_up_new_task() directly here,
+	 * since it assumes that a child belongs to the same vsched
+	 */
+	p->state = TASK_RUNNING;
+	p->sleep_avg = 0;
+	p->prio = effective_prio(p);
+
+	rq = task_rq_lock(p, &flags);
+	__activate_task(p, rq);
+	task_rq_unlock(rq, &flags);
+}
+
+/*
  * Perform scheduler related setup for a newly forked process p.
  * p is forked by current.
  */
 void fastcall sched_fork(task_t *p, int clone_flags)
 {
-	int cpu = get_cpu();
+	vcpu_t cpu;
 
+	preempt_disable();
+	cpu = this_vcpu();
 #ifdef CONFIG_SMP
 	cpu = sched_balance_self(cpu, SD_BALANCE_FORK);
 #endif
-	set_task_cpu(p, cpu);
+	set_task_vcpu(p, cpu);
 
 	/*
 	 * We mark the process as running here, but have not actually
@@ -1372,6 +2404,10 @@ void fastcall sched_fork(task_t *p, int 
 	p->first_time_slice = 1;
 	current->time_slice >>= 1;
 	p->timestamp = sched_clock();
+#ifdef CONFIG_VE
+	/*cosmetic: sleep till wakeup below*/
+	p->ve_task_info.sleep_time -= get_cycles();
+#endif
 	if (unlikely(!current->time_slice)) {
 		/*
 		 * This case is rare, it happens when the parent has only
@@ -1382,7 +2418,7 @@ void fastcall sched_fork(task_t *p, int 
 		scheduler_tick();
 	}
 	local_irq_enable();
-	put_cpu();
+	preempt_enable();
 }
 
 /*
@@ -1395,13 +2431,19 @@ void fastcall sched_fork(task_t *p, int 
 void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags)
 {
 	unsigned long flags;
-	int this_cpu, cpu;
+	vcpu_t this_cpu, cpu;
 	runqueue_t *rq, *this_rq;
 
+	if (unlikely(p->pid == 1)) {
+		wake_up_init(p);
+		return;
+	}
+
 	rq = task_rq_lock(p, &flags);
 	BUG_ON(p->state != TASK_RUNNING);
-	this_cpu = smp_processor_id();
-	cpu = task_cpu(p);
+	BUG_ON(task_vsched(current) != task_vsched(p));
+	this_cpu = this_vcpu();
+	cpu = task_vcpu(p);
 
 	/*
 	 * We decrease the sleep average of forking parents
@@ -1429,6 +2471,9 @@ void fastcall wake_up_new_task(task_t *p
 				p->array = current->array;
 				p->array->nr_active++;
 				rq->nr_running++;
+				ve_nr_running_inc(VE_TASK_INFO(p)->owner_env,
+						task_cpu(p));
+				nr_running_inc(smp_processor_id());
 			}
 			set_need_resched();
 		} else
@@ -1442,7 +2487,7 @@ void fastcall wake_up_new_task(task_t *p
 		 */
 		this_rq = rq;
 	} else {
-		this_rq = cpu_rq(this_cpu);
+		this_rq = vcpu_rq(this_cpu);
 
 		/*
 		 * Not the local CPU - must adjust timestamp. This should
@@ -1485,7 +2530,7 @@ void fastcall sched_exit(task_t *p)
 	 * the sleep_avg of the parent as well.
 	 */
 	rq = task_rq_lock(p->parent, &flags);
-	if (p->first_time_slice && task_cpu(p) == task_cpu(p->parent)) {
+	if (p->first_time_slice && task_vcpu(p) == task_vcpu(p->parent)) {
 		p->parent->time_slice += p->time_slice;
 		if (unlikely(p->parent->time_slice > task_timeslice(p)))
 			p->parent->time_slice = task_timeslice(p);
@@ -1552,6 +2597,7 @@ static inline void finish_task_switch(ru
 	prev_task_flags = prev->flags;
 	finish_arch_switch(prev);
 	finish_lock_switch(rq, prev);
+
 	if (mm)
 		mmdrop(mm);
 	if (unlikely(prev_task_flags & PF_DEAD)) {
@@ -1578,8 +2624,9 @@ asmlinkage void schedule_tail(task_t *pr
 	preempt_enable();
 #endif
 	if (current->set_child_tid)
-		put_user(current->pid, current->set_child_tid);
+		put_user(virt_pid(current), current->set_child_tid);
 }
+EXPORT_SYMBOL_GPL(schedule_tail);
 
 /*
  * context_switch - switch to the new MM and the new
@@ -1619,20 +2666,26 @@ task_t * context_switch(runqueue_t *rq, 
  */
 unsigned long nr_running(void)
 {
-	unsigned long i, sum = 0;
+	unsigned long i, sum;
 
+	sum = 0;
 	for_each_online_cpu(i)
-		sum += cpu_rq(i)->nr_running;
+		sum += glob_task_nrs[i].nr_running;
+
+	if (unlikely((long)sum < 0))
+		sum = 0;
 
 	return sum;
 }
+EXPORT_SYMBOL(nr_running);
 
 unsigned long nr_uninterruptible(void)
 {
-	unsigned long i, sum = 0;
+	unsigned long i, sum;
 
+	sum = 0;
 	for_each_cpu(i)
-		sum += cpu_rq(i)->nr_uninterruptible;
+		sum += glob_task_nrs[i].nr_unint;
 
 	/*
 	 * Since we read the counters lockless, it might be slightly
@@ -1644,22 +2697,34 @@ unsigned long nr_uninterruptible(void)
 	return sum;
 }
 
+EXPORT_SYMBOL(nr_uninterruptible);
+
 unsigned long long nr_context_switches(void)
 {
-	unsigned long long i, sum = 0;
+	unsigned long long i, sum;
 
+	sum = 0;
 	for_each_cpu(i)
-		sum += cpu_rq(i)->nr_switches;
+		sum += glob_task_nrs[i].nr_switches;
+
+	if (unlikely((long)sum < 0))
+		sum = 0;
 
 	return sum;
 }
 
+EXPORT_SYMBOL(nr_context_switches);
+
 unsigned long nr_iowait(void)
 {
-	unsigned long i, sum = 0;
+	unsigned long i, sum;
 
+	sum = 0;
 	for_each_cpu(i)
-		sum += atomic_read(&cpu_rq(i)->nr_iowait);
+		sum += atomic_read(&glob_task_nrs[i].nr_iowait);
+
+	if (unlikely((long)sum < 0))
+		sum = 0;
 
 	return sum;
 }
@@ -1669,8 +2734,8 @@ unsigned long nr_active(void)
 	unsigned long i, running = 0, uninterruptible = 0;
 
 	for_each_online_cpu(i) {
-		running += cpu_rq(i)->nr_running;
-		uninterruptible += cpu_rq(i)->nr_uninterruptible;
+		running += glob_task_nrs[i].nr_running;
+		uninterruptible += glob_task_nrs[i].nr_unint;
 	}
 
 	if (unlikely((long)uninterruptible < 0))
@@ -1680,7 +2745,95 @@ unsigned long nr_active(void)
 }
 
 
-#ifdef CONFIG_SMP
+EXPORT_SYMBOL(nr_iowait);
+
+unsigned long nr_stopped(void)
+{
+	unsigned long i, sum;
+
+	sum = 0;
+	for_each_cpu(i)
+		sum += glob_task_nrs[i].nr_stopped;
+
+	if (unlikely((long)sum < 0))
+		sum = 0;
+
+	return sum;
+}
+
+EXPORT_SYMBOL(nr_stopped);
+
+unsigned long nr_sleeping(void)
+{
+	unsigned long i, sum;
+
+	sum = 0;
+	for_each_cpu(i)
+		sum += glob_task_nrs[i].nr_sleeping;
+
+	if (unlikely((long)sum < 0))
+		sum = 0;
+
+	return sum;
+}
+
+EXPORT_SYMBOL(nr_sleeping);
+
+#ifdef CONFIG_VE
+unsigned long nr_running_ve(struct ve_struct *ve)
+{
+	int i;
+	long sum;
+
+	sum = 0;
+	for_each_online_cpu(i)
+		sum += VE_CPU_STATS(ve, i)->nr_running;
+	return (unsigned long)(sum < 0 ? 0 : sum);
+}
+
+EXPORT_SYMBOL(nr_running_ve);
+
+unsigned long nr_uninterruptible_ve(struct ve_struct *ve)
+{
+	int i;
+	long sum;
+
+	sum = 0;
+	for_each_online_cpu(i)
+		sum += VE_CPU_STATS(ve, i)->nr_unint;
+	return (unsigned long)(sum < 0 ? 0 : sum);
+}
+
+EXPORT_SYMBOL(nr_uninterruptible_ve);
+
+unsigned long nr_iowait_ve(void)
+{
+	long sum = 0;
+
+#ifdef CONFIG_SCHED_VCPU
+	int i;
+	struct vcpu_scheduler *vsched;
+	vsched = this_vsched();
+	for_each_cpu_mask(i, vsched_vcpu_online_map(vsched)) {
+		struct runqueue *rq;
+
+		rq = vcpu_rq(vsched_vcpu(vsched, i));
+		sum += atomic_read(&rq->nr_iowait);
+	}
+#endif
+	return (unsigned long)(sum < 0 ? 0 : sum);
+}
+
+EXPORT_SYMBOL(nr_iowait_ve);
+#endif
+
+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_VCPU)
+
+#ifdef CONFIG_SCHED_VCPU
+#define rq_compare(rq1, rq2)	(rq1 < rq2)
+#else
+#define rq_compare(rq1, rq2)	(rq1->cpu < rq2->cpu)
+#endif
 
 /*
  * double_rq_lock - safely lock two runqueues
@@ -1695,11 +2848,12 @@ static void double_rq_lock(runqueue_t *r
 	__acquires(rq1->lock)
 	__acquires(rq2->lock)
 {
+	BUG_ON(!irqs_disabled());
 	if (rq1 == rq2) {
 		spin_lock(&rq1->lock);
 		__acquire(rq2->lock);	/* Fake it out ;) */
 	} else {
-		if (rq1->cpu < rq2->cpu) {
+		if (rq_compare(rq1, rq2)) {
 			spin_lock(&rq1->lock);
 			spin_lock(&rq2->lock);
 		} else {
@@ -1727,38 +2881,20 @@ static void double_rq_unlock(runqueue_t 
 }
 
 /*
- * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
- */
-static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest)
-	__releases(this_rq->lock)
-	__acquires(busiest->lock)
-	__acquires(this_rq->lock)
-{
-	if (unlikely(!spin_trylock(&busiest->lock))) {
-		if (busiest->cpu < this_rq->cpu) {
-			spin_unlock(&this_rq->lock);
-			spin_lock(&busiest->lock);
-			spin_lock(&this_rq->lock);
-		} else
-			spin_lock(&busiest->lock);
-	}
-}
-
-/*
  * If dest_cpu is allowed for this process, migrate the task to it.
  * This is accomplished by forcing the cpu_allowed mask to only
  * allow dest_cpu, which will force the cpu onto dest_cpu.  Then
  * the cpu_allowed mask is restored.
  */
-static void sched_migrate_task(task_t *p, int dest_cpu)
+static void sched_migrate_task(task_t *p, vcpu_t dest_cpu)
 {
 	migration_req_t req;
 	runqueue_t *rq;
 	unsigned long flags;
 
 	rq = task_rq_lock(p, &flags);
-	if (!cpu_isset(dest_cpu, p->cpus_allowed)
-	    || unlikely(cpu_is_offline(dest_cpu)))
+	if (unlikely(!vcpu_isset(dest_cpu, p->cpus_allowed)
+	    || vcpu_is_offline(dest_cpu)))
 		goto out;
 
 	/* force the process onto the specified CPU */
@@ -1775,6 +2911,26 @@ static void sched_migrate_task(task_t *p
 out:
 	task_rq_unlock(rq, &flags);
 }
+#endif
+
+#ifdef CONFIG_SMP
+/*
+ * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
+ */
+static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest)
+	__releases(this_rq->lock)
+	__acquires(busiest->lock)
+	__acquires(this_rq->lock)
+{
+	if (unlikely(!spin_trylock(&busiest->lock))) {
+		if (rq_compare(busiest, this_rq)) {
+			spin_unlock(&this_rq->lock);
+			spin_lock(&busiest->lock);
+			spin_lock(&this_rq->lock);
+		} else
+			spin_lock(&busiest->lock);
+	}
+}
 
 /*
  * sched_exec - execve() is a valuable balancing opportunity, because at
@@ -1782,9 +2938,12 @@ out:
  */
 void sched_exec(void)
 {
-	int new_cpu, this_cpu = get_cpu();
+	vcpu_t new_cpu, this_cpu;
+
+	preempt_disable();
+	this_cpu = this_vcpu();
 	new_cpu = sched_balance_self(this_cpu, SD_BALANCE_EXEC);
-	put_cpu();
+	preempt_enable();
 	if (new_cpu != this_cpu)
 		sched_migrate_task(current, new_cpu);
 }
@@ -1795,12 +2954,32 @@ void sched_exec(void)
  */
 static
 void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
-	       runqueue_t *this_rq, prio_array_t *this_array, int this_cpu)
+	       runqueue_t *this_rq, prio_array_t *this_array, vcpu_t this_cpu)
 {
+	cycles_t cycles;
+	int cpu;
+#ifdef CONFIG_VE
+	struct ve_struct *ve;
+
+	ve = VE_TASK_INFO(p)->owner_env;
+#endif
+	cycles = get_cycles();
+
 	dequeue_task(p, src_array);
 	src_rq->nr_running--;
-	set_task_cpu(p, this_cpu);
+	cpu = task_cpu(p);
+	ve_nr_running_dec(ve, cpu);
+	if (src_rq->nr_running == 0) {
+		ve_strt_idle(ve, cpu, cycles);
+		vcpu_detach(src_rq);
+	}
+	set_task_vcpu(p, this_cpu);
+	if (this_rq->nr_running == 0) {
+		ve_stop_idle(ve, this_cpu, cycles);
+		vcpu_attach(this_rq);
+	}
 	this_rq->nr_running++;
+	ve_nr_running_inc(ve, task_cpu(p));
 	enqueue_task(p, this_array);
 	p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
 				+ this_rq->timestamp_last_tick;
@@ -1816,7 +2995,7 @@ void pull_task(runqueue_t *src_rq, prio_
  * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
  */
 static
-int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,
+int can_migrate_task(task_t *p, runqueue_t *rq, vcpu_t this_cpu,
 		     struct sched_domain *sd, enum idle_type idle,
 		     int *all_pinned)
 {
@@ -1826,7 +3005,7 @@ int can_migrate_task(task_t *p, runqueue
 	 * 2) cannot be migrated to this CPU due to cpus_allowed, or
 	 * 3) are cache-hot on their current CPU.
 	 */
-	if (!cpu_isset(this_cpu, p->cpus_allowed))
+	if (!vcpu_isset(this_cpu, p->cpus_allowed))
 		return 0;
 	*all_pinned = 0;
 
@@ -1854,7 +3033,7 @@ int can_migrate_task(task_t *p, runqueue
  *
  * Called with both runqueues locked.
  */
-static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest,
+static int move_tasks(runqueue_t *this_rq, vcpu_t this_cpu, runqueue_t *busiest,
 		      unsigned long max_nr_move, struct sched_domain *sd,
 		      enum idle_type idle, int *all_pinned)
 {
@@ -1863,6 +3042,8 @@ static int move_tasks(runqueue_t *this_r
 	int idx, pulled = 0, pinned = 0;
 	task_t *tmp;
 
+	if (vcpu_is_offline(this_cpu))
+		goto out;
 	if (max_nr_move == 0)
 		goto out;
 
@@ -1947,7 +3128,7 @@ out:
  * moved to restore balance via the imbalance parameter.
  */
 static struct sched_group *
-find_busiest_group(struct sched_domain *sd, int this_cpu,
+find_busiest_group(struct sched_domain *sd, vcpu_t this_cpu,
 		   unsigned long *imbalance, enum idle_type idle, int *sd_idle,
 		   cpumask_t *cpus)
 {
@@ -1955,6 +3136,12 @@ find_busiest_group(struct sched_domain *
 	unsigned long max_load, avg_load, total_load, this_load, total_pwr;
 	unsigned long max_pull;
 	int load_idx;
+	struct vcpu_scheduler *vsched;
+	vcpu_t vcpu;
+	int this_pcpu;
+
+	vsched = vcpu_vsched(this_cpu);
+	this_pcpu = vcpu_last_pcpu(this_cpu);
 
 	max_load = this_load = total_load = total_pwr = 0;
 	if (idle == NOT_IDLE)
@@ -1965,27 +3152,28 @@ find_busiest_group(struct sched_domain *
 		load_idx = sd->idle_idx;
 
 	do {
+		cpumask_t tmp;
 		unsigned long load;
 		int local_group;
 		int i;
 
-		local_group = cpu_isset(this_cpu, group->cpumask);
+		local_group = cpu_isset(this_pcpu, group->cpumask);
 
 		/* Tally up the load of all CPUs in the group */
 		avg_load = 0;
+		cpus_and(tmp, group->cpumask, vsched_pcpu_running_map(vsched));
+		cpus_and(tmp, tmp, *cpus);
 
-		for_each_cpu_mask(i, group->cpumask) {
-			if (!cpu_isset(i, *cpus))
-				continue;
-
+		for_each_cpu_mask(i, tmp) {
+			vcpu = pcpu(i)->vcpu;
 			if (*sd_idle && !idle_cpu(i))
 				*sd_idle = 0;
 
 			/* Bias balancing toward cpus of our domain */
 			if (local_group)
-				load = target_load(i, load_idx);
+				load = target_load(vcpu, load_idx);
 			else
-				load = source_load(i, load_idx);
+				load = source_load(vcpu, load_idx);
 
 			avg_load += load;
 		}
@@ -2008,6 +3196,8 @@ find_busiest_group(struct sched_domain *
 
 	if (!busiest || this_load >= max_load || max_load <= SCHED_LOAD_SCALE)
 		goto out_balanced;
+	if (!this)
+		this = busiest; /* this->cpu_power is needed below */
 
 	avg_load = (SCHED_LOAD_SCALE * total_load) / total_pwr;
 
@@ -2090,25 +3280,28 @@ out_balanced:
 /*
  * find_busiest_queue - find the busiest runqueue among the cpus in group.
  */
-static runqueue_t *find_busiest_queue(struct sched_group *group,
+static vcpu_t find_busiest_queue(vcpu_t this_cpu, struct sched_group *group,
 	enum idle_type idle, cpumask_t *cpus)
 {
 	unsigned long load, max_load = 0;
-	runqueue_t *busiest = NULL;
+	struct vcpu_scheduler *vsched;
+	vcpu_t vcpu, busiest = NULL;
+	cpumask_t tmp;
 	int i;
 
-	for_each_cpu_mask(i, group->cpumask) {
-		if (!cpu_isset(i, *cpus))
-			continue;
-
-		load = source_load(i, 0);
+	vsched = vcpu_vsched(this_cpu);
+	cpus_and(tmp, group->cpumask, *cpus);
 
+	for_each_cpu_mask(i, vsched_vcpu_online_map(vsched)) {
+		vcpu = vsched_vcpu(vsched, i);
+		if (!cpu_isset(vcpu_last_pcpu(vcpu), tmp))
+			continue;
+		load = source_load(vcpu, 0);
 		if (load > max_load) {
 			max_load = load;
-			busiest = cpu_rq(i);
+			busiest = vcpu;
 		}
 	}
-
 	return busiest;
 }
 
@@ -2124,10 +3317,11 @@ static runqueue_t *find_busiest_queue(st
  *
  * Called with this_rq unlocked.
  */
-static int load_balance(int this_cpu, runqueue_t *this_rq,
+static int load_balance(vcpu_t this_cpu, runqueue_t *this_rq,
 			struct sched_domain *sd, enum idle_type idle)
 {
 	struct sched_group *group;
+	vcpu_t busiest_vcpu;
 	runqueue_t *busiest;
 	unsigned long imbalance;
 	int nr_moved, all_pinned = 0;
@@ -2141,6 +3335,17 @@ static int load_balance(int this_cpu, ru
 	schedstat_inc(sd, lb_cnt[idle]);
 
 redo:
+#ifdef CONFIG_SCHED_VCPU
+	if (likely(vcpu_vsched(this_cpu) == &idle_vsched)) {
+		/*
+		 * Find idle vcpu to balance to
+		 */
+		this_cpu = find_idle_target(&cpus);
+		if (!this_cpu)
+			goto out_balanced;
+		this_rq = vcpu_rq(this_cpu);
+	}
+#endif
 	group = find_busiest_group(sd, this_cpu, &imbalance, idle,
 			&sd_idle, &cpus);
 	if (!group) {
@@ -2148,13 +3353,16 @@ redo:
 		goto out_balanced;
 	}
 
-	busiest = find_busiest_queue(group, idle, &cpus);
-	if (!busiest) {
+	busiest_vcpu = find_busiest_queue(this_cpu, group, idle, &cpus);
+	if (!busiest_vcpu) {
 		schedstat_inc(sd, lb_nobusyq[idle]);
 		goto out_balanced;
 	}
 
-	BUG_ON(busiest == this_rq);
+	busiest = vcpu_rq(busiest_vcpu);
+
+	if (unlikely(busiest == this_rq))
+		goto out_balanced;
 
 	schedstat_add(sd, lb_imbalance[idle], imbalance);
 
@@ -2173,7 +3381,7 @@ redo:
 
 		/* All tasks on this runqueue were pinned by CPU affinity */
 		if (unlikely(all_pinned)) {
-			cpu_clear(busiest->cpu, cpus);
+			cpu_clear(vcpu_last_pcpu(busiest_vcpu), cpus);
 			if (!cpus_empty(cpus))
 				goto redo;
 			goto out_balanced;
@@ -2191,7 +3399,7 @@ redo:
 			/* don't kick the migration_thread, if the curr
 			 * task on busiest cpu can't be moved to this_cpu
 			 */
-			if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) {
+			if (!vcpu_isset(this_cpu, busiest->curr->cpus_allowed)) {
 				spin_unlock(&busiest->lock);
 				all_pinned = 1;
 				goto out_one_pinned;
@@ -2256,11 +3464,12 @@ out_one_pinned:
  * Called from schedule when this_rq is about to become idle (NEWLY_IDLE).
  * this_rq is locked.
  */
-static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
+static int load_balance_newidle(vcpu_t this_cpu, runqueue_t *this_rq,
 				struct sched_domain *sd)
 {
 	struct sched_group *group;
-	runqueue_t *busiest = NULL;
+	runqueue_t *busiest;
+	vcpu_t busiest_vcpu;
 	unsigned long imbalance;
 	int nr_moved = 0;
 	int sd_idle = 0;
@@ -2278,13 +3487,12 @@ redo:
 		goto out_balanced;
 	}
 
-	busiest = find_busiest_queue(group, NEWLY_IDLE, &cpus);
-	if (!busiest) {
+	busiest_vcpu = find_busiest_queue(this_cpu, group, NEWLY_IDLE, &cpus);
+	if (!busiest_vcpu || busiest_vcpu == this_cpu) {
 		schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]);
 		goto out_balanced;
 	}
-
-	BUG_ON(busiest == this_rq);
+	busiest = vcpu_rq(busiest_vcpu);
 
 	schedstat_add(sd, lb_imbalance[NEWLY_IDLE], imbalance);
 
@@ -2297,7 +3505,7 @@ redo:
 		spin_unlock(&busiest->lock);
 
 		if (!nr_moved) {
-			cpu_clear(busiest->cpu, cpus);
+			cpu_clear(vcpu_last_pcpu(busiest_vcpu), cpus);
 			if (!cpus_empty(cpus))
 				goto redo;
 		}
@@ -2323,8 +3531,11 @@ out_balanced:
 /*
  * idle_balance is called by schedule() if this_cpu is about to become
  * idle. Attempts to pull tasks from other CPUs.
+ *
+ * Returns whether to continue with another runqueue
+ * instead of switching to idle.
  */
-static void idle_balance(int this_cpu, runqueue_t *this_rq)
+static int idle_balance(vcpu_t this_cpu, runqueue_t *this_rq)
 {
 	struct sched_domain *sd;
 
@@ -2332,10 +3543,11 @@ static void idle_balance(int this_cpu, r
 		if (sd->flags & SD_BALANCE_NEWIDLE) {
 			if (load_balance_newidle(this_cpu, this_rq, sd)) {
 				/* We've pulled tasks over so stop searching */
-				break;
+				return 1;
 			}
 		}
 	}
+	return 0;
 }
 
 /*
@@ -2345,18 +3557,26 @@ static void idle_balance(int this_cpu, r
  * logical imbalances.
  *
  * Called with busiest_rq locked.
+ *
+ * In human terms: balancing of CPU load by moving tasks between CPUs is
+ * performed by 2 methods, push and pull.
+ * In certain places when CPU is found to be idle, it performs pull from busy
+ * CPU to current (idle) CPU.
+ * active_load_balance implements push method, with migration thread getting
+ * scheduled on a busy CPU (hence, making all running processes on this CPU sit
+ * in the queue) and selecting where to push and which task.
  */
-static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu)
+static void active_load_balance(runqueue_t *busiest_rq, vcpu_t busiest_cpu)
 {
 	struct sched_domain *sd;
 	runqueue_t *target_rq;
-	int target_cpu = busiest_rq->push_cpu;
+	vcpu_t target_cpu = busiest_rq->push_cpu;
 
 	if (busiest_rq->nr_running <= 1)
 		/* no task to move */
 		return;
 
-	target_rq = cpu_rq(target_cpu);
+	target_rq = vcpu_rq(target_cpu);
 
 	/*
 	 * This condition is "impossible", if it occurs
@@ -2368,10 +3588,17 @@ static void active_load_balance(runqueue
 	/* move a task from busiest_rq to target_rq */
 	double_lock_balance(busiest_rq, target_rq);
 
+	/*
+	 * Our main candidate where to push our tasks is busiest->push_cpu.
+	 * First, find the domain that spans over both that candidate CPU and
+	 * the current one.
+	 *
+	 * FIXME: make sure that push_cpu doesn't disappear before we get here.
+	 */
 	/* Search for an sd spanning us and the target CPU. */
 	for_each_domain(target_cpu, sd)
 		if ((sd->flags & SD_LOAD_BALANCE) &&
-			cpu_isset(busiest_cpu, sd->span))
+			cpu_isset(vcpu_last_pcpu(busiest_cpu), sd->span))
 				break;
 
 	if (unlikely(sd == NULL))
@@ -2397,31 +3624,17 @@ out:
  */
 
 /* Don't have all balancing operations going off at once */
-#define CPU_OFFSET(cpu) (HZ * cpu / NR_CPUS)
+#define CPU_OFFSET(cpu) (HZ * (cpu) / NR_CPUS)
 
-static void rebalance_tick(int this_cpu, runqueue_t *this_rq,
+static void rebalance_tick(vcpu_t this_cpu, runqueue_t *this_rq,
 			   enum idle_type idle)
 {
-	unsigned long old_load, this_load;
-	unsigned long j = jiffies + CPU_OFFSET(this_cpu);
+	unsigned long j;
 	struct sched_domain *sd;
-	int i;
 
-	this_load = this_rq->nr_running * SCHED_LOAD_SCALE;
 	/* Update our load */
-	for (i = 0; i < 3; i++) {
-		unsigned long new_load = this_load;
-		int scale = 1 << i;
-		old_load = this_rq->cpu_load[i];
-		/*
-		 * Round up the averaging division if load is increasing. This
-		 * prevents us from getting stuck on 9 if the load is 10, for
-		 * example.
-		 */
-		if (new_load > old_load)
-			new_load += scale-1;
-		this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) / scale;
-	}
+	update_rq_cpu_load(this_rq);
+	j = jiffies + CPU_OFFSET(smp_processor_id());
 
 	for_each_domain(this_cpu, sd) {
 		unsigned long interval;
@@ -2455,17 +3668,19 @@ static void rebalance_tick(int this_cpu,
 /*
  * on UP we do not need to balance between CPUs:
  */
-static inline void rebalance_tick(int cpu, runqueue_t *rq, enum idle_type idle)
+static inline void rebalance_tick(vcpu_t cpu, runqueue_t *rq, enum idle_type idle)
 {
 }
-static inline void idle_balance(int cpu, runqueue_t *rq)
+static inline int idle_balance(vcpu_t cpu, runqueue_t *rq)
 {
 }
 #endif
 
-static inline int wake_priority_sleeper(runqueue_t *rq)
+static inline int wake_priority_sleeper(runqueue_t *rq, task_t *idle)
 {
 	int ret = 0;
+#ifndef CONFIG_SCHED_VCPU
+	/* FIXME: can we implement SMT priority sleeping for this? */
 #ifdef CONFIG_SCHED_SMT
 	spin_lock(&rq->lock);
 	/*
@@ -2473,11 +3688,13 @@ static inline int wake_priority_sleeper(
 	 * reasons reschedule the idle task to see if it can now run.
 	 */
 	if (rq->nr_running) {
-		resched_task(rq->idle);
+		/* FIXME */
+		resched_task(idle);
 		ret = 1;
 	}
 	spin_unlock(&rq->lock);
 #endif
+#endif
 	return ret;
 }
 
@@ -2511,6 +3728,15 @@ unsigned long long current_sched_time(co
 	return ns;
 }
 
+#ifdef CONFIG_VE
+#define update_ve_cpu_time(p, time, tick)	do {		\
+		VE_CPU_STATS((p)->ve_task_info.owner_env,	\
+			task_cpu(p))->time += tick;		\
+	} while (0)
+#else
+#define update_ve_cpu_time(p, time, tick)	do { } while (0)
+#endif
+
 /*
  * Account user cpu time to a process.
  * @p: the process that the cpu time gets accounted to
@@ -2526,10 +3752,13 @@ void account_user_time(struct task_struc
 
 	/* Add user time to cpustat. */
 	tmp = cputime_to_cputime64(cputime);
-	if (TASK_NICE(p) > 0)
+	if (TASK_NICE(p) > 0) {
 		cpustat->nice = cputime64_add(cpustat->nice, tmp);
-	else
+		update_ve_cpu_time(p, nice, tmp);
+	} else {
 		cpustat->user = cputime64_add(cpustat->user, tmp);
+		update_ve_cpu_time(p, user, tmp);
+	}
 }
 
 /*
@@ -2542,20 +3771,22 @@ void account_system_time(struct task_str
 			 cputime_t cputime)
 {
 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
-	runqueue_t *rq = this_rq();
+	int this_pcpu = raw_smp_processor_id();
 	cputime64_t tmp;
 
 	p->stime = cputime_add(p->stime, cputime);
+	tmp = cputime_to_cputime64(cputime);
+
+	update_ve_cpu_time(p, system, tmp);
 
 	/* Add system time to cpustat. */
-	tmp = cputime_to_cputime64(cputime);
 	if (hardirq_count() - hardirq_offset)
 		cpustat->irq = cputime64_add(cpustat->irq, tmp);
 	else if (softirq_count())
 		cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
-	else if (p != rq->idle)
+	else if (p != this_pcpu()->idle)
 		cpustat->system = cputime64_add(cpustat->system, tmp);
-	else if (atomic_read(&rq->nr_iowait) > 0)
+	else if ((atomic_read(&glob_task_nrs[this_pcpu].nr_iowait) > 0))
 		cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
 	else
 		cpustat->idle = cputime64_add(cpustat->idle, tmp);
@@ -2574,7 +3805,7 @@ void account_steal_time(struct task_stru
 	cputime64_t tmp = cputime_to_cputime64(steal);
 	runqueue_t *rq = this_rq();
 
-	if (p == rq->idle) {
+	if (p == this_pcpu()->idle) {
 		p->stime = cputime_add(p->stime, steal);
 		if (atomic_read(&rq->nr_iowait) > 0)
 			cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
@@ -2594,18 +3825,23 @@ void account_steal_time(struct task_stru
 void scheduler_tick(void)
 {
 	int cpu = smp_processor_id();
-	runqueue_t *rq = this_rq();
+	vcpu_t vcpu;
+	runqueue_t *rq;
 	task_t *p = current;
 	unsigned long long now = sched_clock();
 
+	vcpu = this_vcpu();
+	rq = vcpu_rq(vcpu);
 	update_cpu_clock(p, rq, now);
 
 	rq->timestamp_last_tick = now;
 
-	if (p == rq->idle) {
-		if (wake_priority_sleeper(rq))
+	set_tsk_need_resched(p); //FIXME
+
+	if (p == pcpu(cpu)->idle) {
+		if (wake_priority_sleeper(rq, pcpu(cpu)->idle))
 			goto out;
-		rebalance_tick(cpu, rq, SCHED_IDLE);
+		rebalance_tick(vcpu, rq, SCHED_IDLE);
 		return;
 	}
 
@@ -2681,10 +3917,14 @@ void scheduler_tick(void)
 out_unlock:
 	spin_unlock(&rq->lock);
 out:
-	rebalance_tick(cpu, rq, NOT_IDLE);
+	rebalance_tick(vcpu, rq, NOT_IDLE);
 }
 
-#ifdef CONFIG_SCHED_SMT
+#if defined(CONFIG_SCHED_SMT) && !defined(CONFIG_SCHED_VCPU)
+/* FIXME: SMT scheduling
+ * rq->cpu is initialized with rq address if FAIRSCED is on
+ * this is not correct for SMT case
+ */
 static inline void wakeup_busy_runqueue(runqueue_t *rq)
 {
 	/* If an SMT runqueue is sleeping due to priority reasons wake it up */
@@ -2692,7 +3932,7 @@ static inline void wakeup_busy_runqueue(
 		resched_task(rq->idle);
 }
 
-static void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq)
+static void wake_sleeping_dependent(vcpu_t this_cpu)
 {
 	struct sched_domain *tmp, *sd = NULL;
 	cpumask_t sibling_map;
@@ -2746,7 +3986,7 @@ static inline unsigned long smt_slice(ta
 	return p->time_slice * (100 - sd->per_cpu_gain) / 100;
 }
 
-static int dependent_sleeper(int this_cpu, runqueue_t *this_rq)
+static int dependent_sleeper(vcpu_t this_cpu)
 {
 	struct sched_domain *tmp, *sd = NULL;
 	cpumask_t sibling_map;
@@ -2847,11 +4087,11 @@ out_unlock:
 	return ret;
 }
 #else
-static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq)
+static inline void wake_sleeping_dependent(vcpu_t this_cpu)
 {
 }
 
-static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq)
+static inline int dependent_sleeper(vcpu_t this_cpu)
 {
 	return 0;
 }
@@ -2901,7 +4141,9 @@ asmlinkage void __sched schedule(void)
 	struct list_head *queue;
 	unsigned long long now;
 	unsigned long run_time;
-	int cpu, idx, new_prio;
+	int idx, new_prio;
+	vcpu_t vcpu;
+	cycles_t cycles;
 
 #if defined(CONFIG_LKCD_DUMP) || defined(CONFIG_LKCD_DUMP_MODULE)
 	/*
@@ -2939,13 +4181,14 @@ need_resched:
 	prev = current;
 	release_kernel_lock(prev);
 need_resched_nonpreemptible:
+	cycles = get_cycles();
 	rq = this_rq();
 
 	/*
 	 * The idle thread is not allowed to schedule!
 	 * Remove this check after it has been exercised a bit.
 	 */
-	if (unlikely(prev == rq->idle) && prev->state != TASK_RUNNING) {
+	if (unlikely(prev == this_pcpu()->idle) && prev->state != TASK_RUNNING) {
 		printk(KERN_ERR "bad: scheduling from the idle thread!\n");
 		dump_stack();
 	}
@@ -2983,25 +4226,34 @@ need_resched_nonpreemptible:
 		}
 	}
 
-	cpu = smp_processor_id();
+	prev->sleep_avg -= run_time;
+	if ((long)prev->sleep_avg <= 0)
+		prev->sleep_avg = 0;
+
+	vcpu = rq_vcpu(rq);
+	if (rq->nr_running && vcpu_is_hot(vcpu))
+		goto same_vcpu;
+
+	if (unlikely(!rq->nr_running))
+		idle_balance(vcpu, rq);
+	vcpu = schedule_vcpu(vcpu, cycles);
+	rq = vcpu_rq(vcpu);
+
 	if (unlikely(!rq->nr_running)) {
 go_idle:
-		idle_balance(cpu, rq);
-		if (!rq->nr_running) {
-			next = rq->idle;
-			rq->expired_timestamp = 0;
-			wake_sleeping_dependent(cpu, rq);
-			/*
-			 * wake_sleeping_dependent() might have released
-			 * the runqueue, so break out if we got new
-			 * tasks meanwhile:
-			 */
-			if (!rq->nr_running)
-				goto switch_tasks;
-		}
+		next = this_pcpu()->idle;
+		rq->expired_timestamp = 0;
+		wake_sleeping_dependent(vcpu);
+		/*
+		 * wake_sleeping_dependent() might have released
+		 * the runqueue, so break out if we got new
+		 * tasks meanwhile:
+		 */
+		if (!rq->nr_running)
+			goto switch_tasks;
 	} else {
-		if (dependent_sleeper(cpu, rq)) {
-			next = rq->idle;
+		if (dependent_sleeper(vcpu)) {
+			next = this_pcpu()->idle;
 			goto switch_tasks;
 		}
 		/*
@@ -3013,6 +4265,7 @@ go_idle:
 			goto go_idle;
 	}
 
+same_vcpu:
 	array = rq->active;
 	if (unlikely(!array->nr_active)) {
 		/*
@@ -3049,28 +4302,50 @@ go_idle:
 			requeue_task(next, array);
 	}
 	next->activated = 0;
+
 switch_tasks:
-	if (next == rq->idle)
+	if (next == this_pcpu()->idle)
 		schedstat_inc(rq, sched_goidle);
 	prefetch(next);
 	prefetch_stack(next);
 	clear_tsk_need_resched(prev);
-	rcu_qsctr_inc(task_cpu(prev));
+	rcu_qsctr_inc(task_pcpu(prev));
 
 	update_cpu_clock(prev, rq, now);
 
-	prev->sleep_avg -= run_time;
-	if ((long)prev->sleep_avg <= 0)
-		prev->sleep_avg = 0;
+	/* updated w/o rq->lock, which is ok due to after-read-checks */
 	prev->timestamp = prev->last_ran = now;
 
 	sched_info_switch(prev, next);
 	if (likely(prev != next)) {
+		cycles_t cycles;
+
+		/* current physical CPU id should be valid after switch */
+		set_task_vcpu(next, vcpu);
+		set_task_pcpu(next, task_pcpu(prev));
+		cycles = get_cycles();
 		next->timestamp = now;
 		rq->nr_switches++;
+		glob_task_nrs[smp_processor_id()].nr_switches++;
 		rq->curr = next;
 		++*switch_count;
 
+#ifdef CONFIG_VE
+		prev->ve_task_info.sleep_stamp = cycles;
+		if (prev->state == TASK_RUNNING && prev != this_pcpu()->idle)
+			write_wakeup_stamp(prev, cycles);
+		update_sched_lat(next, cycles);
+
+		/* because next & prev are protected with
+		 * runqueue lock we may not worry about
+		 * wakeup_stamp and sched_time protection
+		 * (same thing in 'else' branch below)
+		 */
+		update_ve_task_info(prev, cycles);
+		next->ve_task_info.sched_time = cycles;
+		write_wakeup_stamp(next, 0);
+#endif
+
 		prepare_task_switch(rq, next);
 		prev = context_switch(rq, prev, next);
 		barrier();
@@ -3080,8 +4355,10 @@ switch_tasks:
 		 * frame will be invalid.
 		 */
 		finish_task_switch(this_rq(), prev);
-	} else
+	} else {
+		update_ve_task_info(prev, get_cycles());
 		spin_unlock_irq(&rq->lock);
+	}
 
 	prev = current;
 	if (unlikely(reacquire_kernel_lock(prev) < 0))
@@ -3623,30 +4900,12 @@ int task_nice(const task_t *p)
 EXPORT_SYMBOL_GPL(task_nice);
 
 /**
- * idle_cpu - is a given cpu idle currently?
- * @cpu: the processor in question.
- */
-int idle_cpu(int cpu)
-{
-	return cpu_curr(cpu) == cpu_rq(cpu)->idle;
-}
-
-/**
- * idle_task - return the idle task for a given cpu.
- * @cpu: the processor in question.
- */
-task_t *idle_task(int cpu)
-{
-	return cpu_rq(cpu)->idle;
-}
-
-/**
  * find_process_by_pid - find a process with a matching PID value.
  * @pid: the pid in question.
  */
 static inline task_t *find_process_by_pid(pid_t pid)
 {
-	return pid ? find_task_by_pid(pid) : current;
+	return pid ? find_task_by_pid_ve(pid) : current;
 }
 
 /* Actually do priority change: must hold rq lock. */
@@ -3706,7 +4965,7 @@ recheck:
 	/*
 	 * Allow unprivileged RT tasks to decrease priority:
 	 */
-	if (!capable(CAP_SYS_NICE)) {
+	if (!capable(CAP_SYS_ADMIN)) {
 		/*
 		 * can't change policy, except between SCHED_NORMAL
 		 * and SCHED_BATCH:
@@ -3786,6 +5045,8 @@ do_sched_setscheduler(pid_t pid, int pol
 	return retval;
 }
 
+EXPORT_SYMBOL(do_sched_setscheduler);
+
 /**
  * sys_sched_setscheduler - set/change the scheduler policy and RT priority
  * @pid: the pid in question.
@@ -4163,11 +5424,20 @@ EXPORT_SYMBOL(yield);
  */
 void __sched io_schedule(void)
 {
-	struct runqueue *rq = &per_cpu(runqueues, raw_smp_processor_id());
+	struct runqueue *rq = this_rq();
+	int cpu;
+
+#ifdef CONFIG_VE
+	struct ve_struct *ve;
+	ve = current->ve_task_info.owner_env;
+#endif
 
 	delayacct_blkio_start();
+	cpu = raw_smp_processor_id();
 	atomic_inc(&rq->nr_iowait);
+	nr_iowait_inc(cpu);
 	schedule();
+	nr_iowait_dec(cpu);
 	atomic_dec(&rq->nr_iowait);
 	delayacct_blkio_end();
 }
@@ -4176,12 +5446,21 @@ EXPORT_SYMBOL(io_schedule);
 
 long __sched io_schedule_timeout(long timeout)
 {
-	struct runqueue *rq = &per_cpu(runqueues, raw_smp_processor_id());
+	struct runqueue *rq = this_rq();
 	long ret;
+	int cpu;
+
+#ifdef CONFIG_VE
+	struct ve_struct *ve;
+	ve = current->ve_task_info.owner_env;
+#endif
 
 	delayacct_blkio_start();
+	cpu = raw_smp_processor_id();
 	atomic_inc(&rq->nr_iowait);
+	nr_iowait_inc(cpu);
 	ret = schedule_timeout(timeout);
+	nr_iowait_dec(cpu);
 	atomic_dec(&rq->nr_iowait);
 	delayacct_blkio_end();
 	return ret;
@@ -4305,15 +5584,9 @@ static void show_task(task_t *p)
 	else
 		printk("?");
 #if (BITS_PER_LONG == 32)
-	if (state == TASK_RUNNING)
-		printk(" running ");
-	else
-		printk(" %08lX ", thread_saved_pc(p));
+	printk(" %08lX ", (unsigned long)p);
 #else
-	if (state == TASK_RUNNING)
-		printk("  running task   ");
-	else
-		printk(" %016lx ", thread_saved_pc(p));
+	printk(" %016lx ", (unsigned long)p);
 #endif
 #ifdef CONFIG_DEBUG_STACK_USAGE
 	{
@@ -4352,26 +5625,43 @@ void show_state(void)
 #if (BITS_PER_LONG == 32)
 	printk("\n"
 	       "                                               sibling\n");
-	printk("  task             PC      pid father child younger older\n");
+	printk("  task       taskaddr      pid father child younger older\n");
 #else
 	printk("\n"
 	       "                                                       sibling\n");
-	printk("  task                 PC          pid father child younger older\n");
+	printk("  task           taskaddr          pid father child younger older\n");
 #endif
 	read_lock(&tasklist_lock);
-	do_each_thread(g, p) {
+	do_each_thread_all(g, p) {
 		/*
 		 * reset the NMI-timeout, listing all files on a slow
 		 * console might take alot of time:
 		 */
 		touch_nmi_watchdog();
 		show_task(p);
-	} while_each_thread(g, p);
+	} while_each_thread_all(g, p);
 
 	read_unlock(&tasklist_lock);
 	mutex_debug_show_all_locks();
 }
 
+#ifdef CONFIG_SCHED_VCPU
+static void init_boot_vcpus(long cpu)
+{
+	if (vsched_vcpu(&idle_vsched, cpu) != NULL)
+		return;
+
+	if (__add_vcpu(&idle_vsched, cpu) != 0)
+		panic("Can't create idle vcpu %ld\n", cpu);
+
+	/* Also create vcpu for default_vsched */
+	if (__add_vcpu(&default_vsched, cpu) != 0)
+		panic("Can't create default vcpu %ld\n", cpu);
+
+	cpu_set(cpu, idle_vsched.pcpu_running_map);
+}
+#endif
+
 /**
  * init_idle - set up an idle thread for a given CPU
  * @idle: task in question
@@ -4382,22 +5672,51 @@ void show_state(void)
  */
 void __cpuinit init_idle(task_t *idle, int cpu)
 {
-	runqueue_t *rq = cpu_rq(cpu);
+	struct vcpu_scheduler *vsched;
+	vcpu_t vcpu;
+	runqueue_t *rq;
 	unsigned long flags;
 
+#ifdef CONFIG_SCHED_VCPU
+	init_boot_vcpus(cpu);
+	vsched = &idle_vsched;
+#else
+	vsched = NULL;
+#endif
+	vcpu = vsched_vcpu(vsched, cpu);
+	rq = vcpu_rq(vcpu);
+
 	idle->timestamp = sched_clock();
 	idle->sleep_avg = 0;
 	idle->array = NULL;
 	idle->prio = MAX_PRIO;
 	idle->state = TASK_RUNNING;
 	idle->cpus_allowed = cpumask_of_cpu(cpu);
+	set_task_vsched(idle, &idle_vsched);
 	set_task_cpu(idle, cpu);
 
 	spin_lock_irqsave(&rq->lock, flags);
-	rq->curr = rq->idle = idle;
+	pcpu(cpu)->idle = idle;
+	rq->curr = idle;
 #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
 	idle->oncpu = 1;
 #endif
+	set_task_pcpu(idle, cpu);
+	set_task_vsched(idle, vsched);
+	set_task_vcpu(idle, vcpu);
+#ifdef CONFIG_SCHED_VCPU
+	/* the following code is very close to vcpu_get */
+	spin_lock(&fairsched_lock);
+	pcpu(cpu)->vcpu = vcpu;
+	pcpu(cpu)->vsched = vcpu->vsched;
+	list_move_tail(&vcpu->list, &vsched->running_list);
+	__set_bit(cpu, vsched->vcpu_running_map.bits);
+	__set_bit(cpu, vsched->pcpu_running_map.bits);
+	vcpu->running = 1;
+	spin_unlock(&fairsched_lock);
+#else
+	pcpu(cpu)->vcpu = vcpu;
+#endif
 	spin_unlock_irqrestore(&rq->lock, flags);
 
 	/* Set the preempt count _outside_ the spinlocks! */
@@ -4417,7 +5736,6 @@ void __cpuinit init_idle(task_t *idle, i
  */
 cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
 
-#ifdef CONFIG_SMP
 /*
  * This is how migration works:
  *
@@ -4434,6 +5752,7 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
  * 7) we wake up and the migration is done.
  */
 
+#ifdef CONFIG_SMP
 /*
  * Change a given task's CPU affinity. Migrate the thread to a
  * proper CPU and schedule it away if the CPU it's executing on
@@ -4449,9 +5768,11 @@ int set_cpus_allowed(task_t *p, cpumask_
 	int ret = 0;
 	migration_req_t req;
 	runqueue_t *rq;
+	struct vcpu_scheduler *vsched;
 
 	rq = task_rq_lock(p, &flags);
-	if (!cpus_intersects(new_mask, cpu_online_map)) {
+	vsched = task_vsched(p);
+	if (!cpus_intersects(new_mask, vsched_vcpu_online_map(vsched))) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -4461,7 +5782,8 @@ int set_cpus_allowed(task_t *p, cpumask_
 	if (cpu_isset(task_cpu(p), new_mask))
 		goto out;
 
-	if (migrate_task(p, any_online_cpu(new_mask), &req)) {
+	if (migrate_task(p, vsched_vcpu(vsched, any_online_cpu(new_mask)),
+								&req)) {
 		/* Need help from migration thread: drop lock and wait. */
 		task_rq_unlock(rq, &flags);
 		wake_up_process(rq->migration_thread);
@@ -4475,6 +5797,7 @@ out:
 }
 
 EXPORT_SYMBOL_GPL(set_cpus_allowed);
+#endif
 
 /*
  * Move (not current) task off this cpu, onto dest cpu.  We're doing
@@ -4485,25 +5808,31 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed);
  * So we race with normal scheduler movements, but that's OK, as long
  * as the task is no longer on this CPU.
  */
-static void __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
+static int __migrate_task(struct task_struct *p, vcpu_t src_cpu, vcpu_t dest_cpu)
 {
 	runqueue_t *rq_dest, *rq_src;
+	int res = 0;
 
-	if (unlikely(cpu_is_offline(dest_cpu)))
-		return;
+	if (unlikely(vcpu_is_offline(dest_cpu)))
+		return 0;
 
-	rq_src = cpu_rq(src_cpu);
-	rq_dest = cpu_rq(dest_cpu);
+#ifdef CONFIG_SCHED_VCPU
+	BUG_ON(vcpu_vsched(src_cpu) == &idle_vsched);
+#endif
+	rq_src = vcpu_rq(src_cpu);
+	rq_dest = vcpu_rq(dest_cpu);
 
 	double_rq_lock(rq_src, rq_dest);
 	/* Already moved. */
-	if (task_cpu(p) != src_cpu)
+	if (task_vcpu(p) != src_cpu)
 		goto out;
 	/* Affinity changed (again). */
-	if (!cpu_isset(dest_cpu, p->cpus_allowed))
+	if (!vcpu_isset(dest_cpu, p->cpus_allowed))
 		goto out;
 
-	set_task_cpu(p, dest_cpu);
+	BUG_ON(task_running(rq_src, p));
+	set_task_vsched(p, vcpu_vsched(dest_cpu));
+	set_task_vcpu(p, dest_cpu);
 	if (p->array) {
 		/*
 		 * Sync timestamp with rq_dest's before activating.
@@ -4518,9 +5847,11 @@ static void __migrate_task(struct task_s
 		if (TASK_PREEMPTS_CURR(p, rq_dest))
 			resched_task(rq_dest->curr);
 	}
+	res = 1;
 
 out:
 	double_rq_unlock(rq_src, rq_dest);
+	return res;
 }
 
 /*
@@ -4528,13 +5859,21 @@ out:
  * thread migration by bumping thread off CPU then 'pushing' onto
  * another runqueue.
  */
+#if defined (CONFIG_HOTPLUG_CPU) || defined (CONFIG_SCHED_VCPU)
+static void migrate_live_tasks(vcpu_t src_cpu);
+static void migrate_dead_tasks(vcpu_t dead_cpu);
+#endif
 static int migration_thread(void *data)
 {
 	runqueue_t *rq;
-	int cpu = (long)data;
+	vcpu_t cpu = (vcpu_t)data;
 
-	rq = cpu_rq(cpu);
+	rq = vcpu_rq(cpu);
 	BUG_ON(rq->migration_thread != current);
+	BUG_ON(!rq->migration_thread_init);
+
+	/* migration thread startup has complete */
+	rq->migration_thread_init = 0;
 
 	set_current_state(TASK_INTERRUPTIBLE);
 	while (!kthread_should_stop()) {
@@ -4545,15 +5884,17 @@ static int migration_thread(void *data)
 
 		spin_lock_irq(&rq->lock);
 
-		if (cpu_is_offline(cpu)) {
+		if (vcpu_is_offline(cpu)) {
 			spin_unlock_irq(&rq->lock);
 			goto wait_to_die;
 		}
 
+#ifdef CONFIG_SMP
 		if (rq->active_balance) {
 			active_load_balance(rq, cpu);
 			rq->active_balance = 0;
 		}
+#endif
 
 		head = &rq->migration_queue;
 
@@ -4572,8 +5913,7 @@ static int migration_thread(void *data)
 
 		complete(&req->done);
 	}
-	__set_current_state(TASK_RUNNING);
-	return 0;
+	goto die;
 
 wait_to_die:
 	/* Wait for kthread_stop */
@@ -4582,18 +5922,34 @@ wait_to_die:
 		schedule();
 		set_current_state(TASK_INTERRUPTIBLE);
 	}
+die:
 	__set_current_state(TASK_RUNNING);
+#if defined (CONFIG_HOTPLUG_CPU) || defined (CONFIG_SCHED_VCPU)
+	migrate_live_tasks(cpu);
+	spin_lock_irq(&rq->lock);
+	migrate_dead_tasks(cpu);
+	spin_unlock_irq(&rq->lock);
+#endif
 	return 0;
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
-/* Figure out where task on dead CPU should go, use force if neccessary. */
-static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *tsk)
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_SCHED_VCPU)
+/*
+ * Figure out where task on dead CPU should go, use force if neccessary.
+ * NOTE: interrupts should be disabled by the caller
+ */
+static void move_task_off_dead_cpu(vcpu_t dead_cpu, struct task_struct *tsk)
 {
 	int dest_cpu;
+	struct vcpu_scheduler *vsched;
 	cpumask_t mask;
+	runqueue_t *rq;
+	unsigned long flags;
 
+restart:
 	/* On same node? */
+#ifndef CONFIG_SCHED_VCPU
+#error FIXME: wrong code
 	mask = node_to_cpumask(cpu_to_node(dead_cpu));
 	cpus_and(mask, mask, tsk->cpus_allowed);
 	dest_cpu = any_online_cpu(mask);
@@ -4617,9 +5973,28 @@ static void move_task_off_dead_cpu(int d
 			       "longer affine to cpu%d\n",
 			       tsk->pid, tsk->comm, dead_cpu);
 	}
-	__migrate_task(tsk, dead_cpu, dest_cpu);
+#else
+	vsched = vcpu_vsched(dead_cpu);
+	cpus_and(mask, vsched_vcpu_online_map(vsched), tsk->cpus_allowed);
+	dest_cpu = any_online_cpu(mask);
+
+	/* On any allowed CPU? */
+	if (dest_cpu == NR_CPUS) {
+		rq = task_rq_lock(tsk, &flags);
+		cpus_setall(tsk->cpus_allowed);
+		task_rq_unlock(rq, &flags);
+		dest_cpu = any_online_cpu(vsched_vcpu_online_map(vsched));
+	}
+	/* this can happen only when non-empty node is removed... */
+	if (dest_cpu == NR_CPUS)
+		printk("BUG: no where to move task %s(%d)\n",
+				tsk->comm, tsk->pid);
+#endif
+	if (!__migrate_task(tsk, dead_cpu, vsched_vcpu(vsched, dest_cpu)))
+		goto restart;
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 /*
  * While a dead CPU has no uninterruptible tasks queued at this point,
  * it might still have a nonzero ->nr_uninterruptible counter, because
@@ -4639,25 +6014,30 @@ static void migrate_nr_uninterruptible(r
 	double_rq_unlock(rq_src, rq_dest);
 	local_irq_restore(flags);
 }
+#endif
 
 /* Run through task list and migrate tasks from the dead cpu. */
-static void migrate_live_tasks(int src_cpu)
+static void migrate_live_tasks(vcpu_t src_cpu)
 {
 	struct task_struct *tsk, *t;
 
+	BUG_ON(vcpu_isset(src_cpu, vsched_vcpu_online_map(vcpu_vsched(src_cpu))));
 	write_lock_irq(&tasklist_lock);
 
-	do_each_thread(t, tsk) {
+	do_each_thread_all(t, tsk) {
 		if (tsk == current)
 			continue;
+		if (tsk == vcpu_rq(src_cpu)->migration_thread)
+			continue;
 
-		if (task_cpu(tsk) == src_cpu)
+		if (task_vcpu(tsk) == src_cpu)
 			move_task_off_dead_cpu(src_cpu, tsk);
-	} while_each_thread(t, tsk);
+	} while_each_thread_all(t, tsk);
 
 	write_unlock_irq(&tasklist_lock);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 /* Schedules idle task to be the next runnable task on current CPU.
  * It does so by boosting its priority to highest possible and adding it to
  * the _front_ of runqueue. Used by CPU offline code.
@@ -4679,6 +6059,9 @@ void sched_idle_next(void)
 
 	__setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1);
 	/* Add idle task to _front_ of it's priority queue */
+#ifdef CONFIG_SCHED_VCPU
+#error "FIXME: VCPU vs. HOTPLUG: fix the code below"
+#endif
 	__activate_idle_task(p, rq);
 
 	spin_unlock_irqrestore(&rq->lock, flags);
@@ -4697,10 +6080,11 @@ void idle_task_exit(void)
 		switch_mm(mm, &init_mm, current);
 	mmdrop(mm);
 }
+#endif /* CONFIG_HOTPLUG_CPU */
 
-static void migrate_dead(unsigned int dead_cpu, task_t *tsk)
+static void migrate_dead(vcpu_t dead_cpu, task_t *tsk)
 {
-	struct runqueue *rq = cpu_rq(dead_cpu);
+	struct runqueue *rq = vcpu_rq(dead_cpu);
 
 	/* Must be exiting, otherwise would be on tasklist. */
 	BUG_ON(tsk->exit_state != EXIT_ZOMBIE && tsk->exit_state != EXIT_DEAD);
@@ -4715,82 +6099,141 @@ static void migrate_dead(unsigned int de
 	 * that's OK.  No task can be added to this CPU, so iteration is
 	 * fine.
 	 */
-	spin_unlock_irq(&rq->lock);
+	spin_unlock(&rq->lock);
 	move_task_off_dead_cpu(dead_cpu, tsk);
-	spin_lock_irq(&rq->lock);
+	spin_lock(&rq->lock);
 
 	put_task_struct(tsk);
 }
 
 /* release_task() removes task from tasklist, so we won't find dead tasks. */
-static void migrate_dead_tasks(unsigned int dead_cpu)
+static void migrate_dead_tasks(vcpu_t dead_cpu)
 {
 	unsigned arr, i;
-	struct runqueue *rq = cpu_rq(dead_cpu);
+	struct runqueue *rq = vcpu_rq(dead_cpu);
 
 	for (arr = 0; arr < 2; arr++) {
 		for (i = 0; i < MAX_PRIO; i++) {
 			struct list_head *list = &rq->arrays[arr].queue[i];
-			while (!list_empty(list))
-				migrate_dead(dead_cpu,
-					     list_entry(list->next, task_t,
-							run_list));
+			task_t *tsk;
+restart:
+			list_for_each_entry(tsk, list, run_list) {
+				if (tsk == rq->migration_thread)
+					continue;
+				migrate_dead(dead_cpu, tsk);
+				goto restart;
+			}
 		}
 	}
 }
-#endif /* CONFIG_HOTPLUG_CPU */
+#endif /* CONFIG_HOTPLUG_CPU || CONFIG_SCHED_VCPU */
+
+static void migration_thread_bind(struct task_struct *k, vcpu_t cpu)
+{
+	BUG_ON(k->state != TASK_INTERRUPTIBLE);
+	/* Must have done schedule() in kthread() before we set_task_cpu */
+	wait_task_inactive(k);
+
+	set_task_vsched(k, vcpu_vsched(cpu));
+	set_task_vcpu(k, cpu);
+	k->cpus_allowed = cpumask_of_cpu(cpu->id);
+}
+
+static void migration_thread_stop(runqueue_t *rq)
+{
+	struct task_struct *thread;
+
+	thread = rq->migration_thread;
+	if (thread == NULL)
+		return;
+
+	/*
+	 * Wait until migration thread has really started, i.e.
+	 * migration_thread() function has been called. It's important,
+	 * because migration thread can be still sleeping after creation, but
+	 * it's vcpu is already marked as online, and tasks can migrate to this
+	 * cpu. If we kill non-started migration thread now, migration_thread()
+	 * function will not be called at all (see how kthread() works).
+	 * And if migration_thread() is not called, there is no way to move 
+	 * tasks away from thread's vcpu. So, rq->nr_running will be != 0 even
+	 * after migration thread is dead.
+	 */
+	while (rq->migration_thread_init)
+		yield();
+
+	get_task_struct(thread);
+	if (kthread_stop(thread) == -EINTR)
+		/*
+		 * Somebody else has called kthread_stop() without 
+		 * waiting for migration thread init has complete.
+		 */
+		BUG_ON(1);
+
+	/* We MUST ensure, that the do_exit of the migration thread is
+	 * completed and it will never scheduled again before vsched_destroy.
+	 * The task with flag PF_DEAD if unscheduled will never receive
+	 * CPU again. */
+	while (!(thread->flags & PF_DEAD) || task_running(rq, thread))
+		yield();
+	put_task_struct(thread);
+
+	rq->migration_thread = NULL;
+}
 
 /*
  * migration_call - callback that gets triggered when a CPU is added.
  * Here we can start up the necessary migration thread for the new CPU.
  */
-static int migration_call(struct notifier_block *nfb, unsigned long action,
+static int vmigration_call(struct notifier_block *nfb, unsigned long action,
 			  void *hcpu)
 {
-	int cpu = (long)hcpu;
+	vcpu_t cpu = (vcpu_t)hcpu;
 	struct task_struct *p;
 	struct runqueue *rq;
 	unsigned long flags;
 
 	switch (action) {
 	case CPU_UP_PREPARE:
-		p = kthread_create(migration_thread, hcpu, "migration/%d",cpu);
+		p = kthread_create(migration_thread, hcpu, "migration/%d/%d",
+			vsched_id(vcpu_vsched(cpu)), cpu->id);
 		if (IS_ERR(p))
 			return NOTIFY_BAD;
 		p->flags |= PF_NOFREEZE;
-		kthread_bind(p, cpu);
-		/* Must be high prio: stop_machine expects to yield to it. */
+
+		migration_thread_bind(p, cpu);
 		rq = task_rq_lock(p, &flags);
+		/* Must be high prio: stop_machine expects to yield to it. */
 		__setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1);
 		task_rq_unlock(rq, &flags);
-		cpu_rq(cpu)->migration_thread = p;
+		vcpu_rq(cpu)->migration_thread = p;
+		vcpu_rq(cpu)->migration_thread_init = 1;
+		cpu_set(cpu->id, vsched_vcpu_online_map(vcpu_vsched(cpu)));
 		break;
 	case CPU_ONLINE:
 		/* Strictly unneccessary, as first user will wake it. */
-		wake_up_process(cpu_rq(cpu)->migration_thread);
+		wake_up_process(vcpu_rq(cpu)->migration_thread);
 		break;
-#ifdef CONFIG_HOTPLUG_CPU
+#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_SCHED_VCPU)
+#error "FIXME: CPU down code doesn't work yet with VCPUs"
+#endif
 	case CPU_UP_CANCELED:
 		/* Unbind it from offline cpu so it can run.  Fall thru. */
-		kthread_bind(cpu_rq(cpu)->migration_thread,
-			     any_online_cpu(cpu_online_map));
-		kthread_stop(cpu_rq(cpu)->migration_thread);
-		cpu_rq(cpu)->migration_thread = NULL;
+		migration_thread_bind(vcpu_rq(cpu)->migration_thread, this_vcpu());
+		migration_thread_stop(vcpu_rq(cpu));
 		break;
 	case CPU_DEAD:
-		migrate_live_tasks(cpu);
-		rq = cpu_rq(cpu);
-		kthread_stop(rq->migration_thread);
-		rq->migration_thread = NULL;
+		rq = vcpu_rq(cpu);
+		migration_thread_stop(rq);
+#ifdef CONFIG_HOTPLUG_CPU
 		/* Idle task back to normal (off runqueue, low prio) */
 		rq = task_rq_lock(rq->idle, &flags);
 		deactivate_task(rq->idle, rq);
 		rq->idle->static_prio = MAX_PRIO;
 		__setscheduler(rq->idle, SCHED_NORMAL, 0);
-		migrate_dead_tasks(cpu);
 		task_rq_unlock(rq, &flags);
 		migrate_nr_uninterruptible(rq);
 		BUG_ON(rq->nr_running != 0);
+#endif
 
 		/* No need to migrate the tasks: it was best-effort if
 		 * they didn't do lock_cpu_hotplug().  Just wake up
@@ -4805,11 +6248,21 @@ static int migration_call(struct notifie
 		}
 		spin_unlock_irq(&rq->lock);
 		break;
-#endif
 	}
 	return NOTIFY_OK;
 }
 
+static int migration_call(struct notifier_block *nfb, unsigned long action,
+			  void *hcpu)
+{
+#ifdef CONFIG_SCHED_VCPU
+	if (action == CPU_UP_PREPARE)
+		init_boot_vcpus((long)hcpu);
+#endif
+	/* we need to translate pcpu to vcpu */
+	return vmigration_call(nfb, action, vsched_default_vcpu((long)hcpu));
+}
+
 /* Register at highest priority so that task migration (migrate_all_tasks)
  * happens before everything else.
  */
@@ -4827,7 +6280,6 @@ int __init migration_init(void)
 	register_cpu_notifier(&migration_notifier);
 	return 0;
 }
-#endif
 
 #ifdef CONFIG_SMP
 #undef SCHED_DOMAIN_DEBUG
@@ -4855,7 +6307,7 @@ static void sched_domain_debug(struct sc
 		printk(KERN_DEBUG);
 		for (i = 0; i < level + 1; i++)
 			printk(" ");
-		printk("domain %d: ", level);
+		printk("domain %d, flags %x: ", level, sd->flags);
 
 		if (!(sd->flags & SD_LOAD_BALANCE)) {
 			printk("does not load-balance\n");
@@ -4980,7 +6432,7 @@ static int sd_parent_degenerate(struct s
  */
 static void cpu_attach_domain(struct sched_domain *sd, int cpu)
 {
-	runqueue_t *rq = cpu_rq(cpu);
+	runqueue_t *rq = vcpu_rq(vsched_default_vcpu(cpu));
 	struct sched_domain *tmp;
 
 	/* Remove the sched domains which do not contribute to scheduling. */
@@ -4997,6 +6449,7 @@ static void cpu_attach_domain(struct sch
 
 	sched_domain_debug(sd, cpu);
 
+	rcu_assign_pointer(pcpu(cpu)->sd, sd);
 	rcu_assign_pointer(rq->sd, sd);
 }
 
@@ -5177,7 +6630,7 @@ static unsigned long domain_distance(int
 	unsigned long distance = 0;
 	struct sched_domain *sd;
 
-	for_each_domain(cpu1, sd) {
+	for_each_pdomain(pcpu(cpu1)->sd, sd) {
 		WARN_ON(!cpu_isset(cpu1, sd->span));
 		if (cpu_isset(cpu2, sd->span))
 			return distance;
@@ -5499,7 +6952,7 @@ static void calibrate_migration_costs(co
 	 */
 	for_each_cpu_mask(cpu, *cpu_map) {
 		distance = 0;
-		for_each_domain(cpu, sd) {
+		for_each_pdomain(pcpu(cpu)->sd, sd) {
 			sd->cache_hot_time = migration_cost[distance];
 			distance++;
 		}
@@ -6153,45 +7606,456 @@ int in_sched_functions(unsigned long add
 		&& addr < (unsigned long)__sched_text_end);
 }
 
-void __init sched_init(void)
+static void init_rq(struct runqueue *rq, int cpu)
+{
+	int j, k;
+	prio_array_t *array;
+
+	spin_lock_init(&rq->lock);
+	rq->nr_running = 0;
+	rq->active = rq->arrays;
+	rq->expired = rq->arrays + 1;
+	rq->best_expired_prio = MAX_PRIO;
+
+#ifdef CONFIG_SMP
+	rq->sd = NULL;
+	for (j = 0; j < 3; j++)
+		rq->cpu_load[j] = 0;
+	rq->active_balance = 0;
+#endif
+	rq->push_cpu = NULL;
+	rq->migration_thread = NULL;
+	INIT_LIST_HEAD(&rq->migration_queue);
+	rq->cpu = cpu;
+	atomic_set(&rq->nr_iowait, 0);
+
+	for (j = 0; j < 2; j++) {
+		array = rq->arrays + j;
+		for (k = 0; k < MAX_PRIO; k++) {
+			INIT_LIST_HEAD(array->queue + k);
+			__clear_bit(k, array->bitmap);
+		}
+		// delimiter for bitsearch
+		__set_bit(MAX_PRIO, array->bitmap);
+	}
+}
+
+static void init_vcpu(vcpu_t vcpu, int id)
+{
+	memset(vcpu, 0, sizeof(struct vcpu_struct));
+	vcpu->id = id;
+	vcpu_last_pcpu(vcpu) = id;
+	init_rq(vcpu_rq(vcpu), id);
+}
+
+#if defined(CONFIG_SCHED_VCPU) || defined(CONFIG_FAIRSCHED)
+/* both rq and vsched lock should be taken */
+static void __install_vcpu(struct vcpu_scheduler *vsched, vcpu_t vcpu)
+{
+	int id;
+
+	id = vcpu->id;
+	vcpu->vsched = vsched;
+	vsched->vcpu[id] = vcpu;
+	vcpu_last_pcpu(vcpu) = id;
+	wmb();
+	/* FIXME: probably locking should be reworked, e.g.
+	   we don't have corresponding rmb(), so we need to update mask
+	   only after quiscent state */
+	/* init_boot_vcpu() should be remade if RCU is used here */
+	list_add(&vcpu->list, &vsched->idle_list);
+	vsched->num_online_vcpus++;
+}
+
+static int install_vcpu(vcpu_t vcpu, struct vcpu_scheduler *vsched)
 {
 	runqueue_t *rq;
-	int i, j, k;
+	unsigned long flags;
+	int res = 0;
 
-	for_each_cpu(i) {
-		prio_array_t *array;
+	rq = vcpu_rq(vcpu);
+	spin_lock_irqsave(&rq->lock, flags);
+	spin_lock(&fairsched_lock);
 
-		rq = cpu_rq(i);
-		spin_lock_init(&rq->lock);
-		rq->nr_running = 0;
-		rq->active = rq->arrays;
-		rq->expired = rq->arrays + 1;
-		rq->best_expired_prio = MAX_PRIO;
+	if (vsched->vcpu[vcpu->id] != NULL)
+		res = -EBUSY;
+	else
+		__install_vcpu(vsched, vcpu);
 
-#ifdef CONFIG_SMP
-		rq->sd = NULL;
-		for (j = 1; j < 3; j++)
-			rq->cpu_load[j] = 0;
-		rq->active_balance = 0;
-		rq->push_cpu = 0;
-		rq->cpu = i;
-		rq->migration_thread = NULL;
-		INIT_LIST_HEAD(&rq->migration_queue);
-		rq->cpu = i;
-#endif
-		atomic_set(&rq->nr_iowait, 0);
-
-		for (j = 0; j < 2; j++) {
-			array = rq->arrays + j;
-			for (k = 0; k < MAX_PRIO; k++) {
-				INIT_LIST_HEAD(array->queue + k);
-				__clear_bit(k, array->bitmap);
-			}
-			// delimiter for bitsearch
-			__set_bit(MAX_PRIO, array->bitmap);
+	spin_unlock(&fairsched_lock);
+	spin_unlock_irqrestore(&rq->lock, flags);
+	return res;
+}
+
+static int __add_vcpu(struct vcpu_scheduler *vsched, int id)
+{
+	vcpu_t vcpu;
+	int res;
+
+	res = -ENOMEM;
+	vcpu = kmalloc(sizeof(struct vcpu_struct), GFP_KERNEL);
+	if (vcpu == NULL)
+		goto out;
+
+	init_vcpu(vcpu, id);
+	vcpu_rq(vcpu)->curr = this_pcpu()->idle;
+	res = install_vcpu(vcpu, vsched);
+	if (res < 0)
+		goto out_free;
+	return 0;
+
+out_free:
+	kfree(vcpu);
+out:
+	return res;
+}
+
+void vsched_init(struct vcpu_scheduler *vsched, int id)
+{
+	memset(vsched, 0, sizeof(*vsched));
+
+	INIT_LIST_HEAD(&vsched->idle_list);
+	INIT_LIST_HEAD(&vsched->active_list);
+	INIT_LIST_HEAD(&vsched->running_list);
+	vsched->num_online_vcpus = 0;
+	vsched->vcpu_online_map = CPU_MASK_NONE;
+	vsched->vcpu_running_map = CPU_MASK_NONE;
+	vsched->pcpu_running_map = CPU_MASK_NONE;
+	vsched->id = id;
+
+	spin_lock(&vsched_list_lock);
+	list_add(&vsched->list, &vsched_list);
+	spin_unlock(&vsched_list_lock);
+}
+
+#ifdef CONFIG_FAIRSCHED
+int scale_vcpu_frequency = 1;
+EXPORT_SYMBOL(scale_vcpu_frequency);
+
+unsigned long ve_scale_khz(unsigned long khz)
+{
+	struct fairsched_node *node;
+	int cpus;
+	unsigned long rate;
+
+	if (!scale_vcpu_frequency)
+		return khz;
+
+	rate = fairsched_nr_cpus << FSCHRATE_SHIFT;
+
+	/*
+	 * Ideally fairsched node should be taken from the current ve_struct.
+	 * However, to simplify the code and locking, it is taken from current
+	 * (currently fairsched_node can be changed only for a sleeping task).
+	 * That means that VE0 processes moved to some special node will get
+	 * fake CPU speed, but that shouldn't be a big problem.
+	 */
+	preempt_disable();
+	node = current->vsched->node;
+	cpus = node->vcpus;
+	if (node->rate_limited)
+		rate = node->rate;
+	preempt_enable();
+
+	return min((unsigned long long)khz,
+		((unsigned long long)khz * (rate / cpus)) >> FSCHRATE_SHIFT);
+}
+
+/* No locks supposed to be held */
+static void vsched_del_vcpu(vcpu_t vcpu, int empty);
+static int vsched_add_vcpu(struct vcpu_scheduler *vsched)
+{
+	int res, err;
+	vcpu_t vcpu;
+	int id;
+	static DECLARE_MUTEX(id_mutex);
+
+	down(&id_mutex);
+	id = find_first_zero_bit(vsched->vcpu_online_map.bits, NR_CPUS);
+	if (id >= NR_CPUS) {
+		err = -EBUSY;
+		goto out_up;
+	}
+
+	err = __add_vcpu(vsched, id);
+	if (err < 0)
+		goto out_up;
+	memset(VE_CPU_STATS(vsched->node->owner_env, id), 0,
+			sizeof(struct ve_cpu_stats));
+	/* Kick idle time collecting logic */
+	ve_strt_idle(vsched->node->owner_env, id, get_cycles());
+
+	vcpu = vsched_vcpu(vsched, id);
+	err = -ENOMEM;
+
+	res = vmigration_call(&migration_notifier, CPU_UP_PREPARE, vcpu);
+	if (res != NOTIFY_OK)
+		goto out_del_up;
+
+	res = vmigration_call(&migration_notifier, CPU_ONLINE, vcpu);
+	if (res != NOTIFY_OK)
+		goto out_cancel_del_up;
+
+	err = 0;
+
+out_up:
+	up(&id_mutex);
+	return err;
+
+out_cancel_del_up:
+	vmigration_call(&migration_notifier, CPU_UP_CANCELED, vcpu);
+out_del_up:
+	vsched_del_vcpu(vcpu, 0);
+	goto out_up;
+}
+
+static void vsched_del_vcpu(vcpu_t vcpu, int empty)
+{
+	struct vcpu_scheduler *vsched;
+	runqueue_t *rq;
+
+	vsched = vcpu_vsched(vcpu);
+	rq = vcpu_rq(vcpu);
+
+	spin_lock_irq(&rq->lock);
+	spin_lock(&fairsched_lock);
+	cpu_clear(vcpu->id, vsched->vcpu_online_map);
+	vsched->num_online_vcpus--;
+	spin_unlock(&fairsched_lock);
+	spin_unlock_irq(&rq->lock);
+
+	/* no need to syncronize, if no tasks at all */
+	if (!empty)
+		synchronize_kernel();
+
+	/*
+        * FIXME: ideas for VCPU hotplug:
+        *
+        * - push_cpu should be checked/cleanuped
+        * - serialization
+        */
+
+	/*
+	 * all tasks should migrate from this VCPU somewhere,
+	 * also, since this moment VCPU is offline, so migration_thread
+	 * won't accept any new tasks...
+	 */
+	vmigration_call(&migration_notifier, CPU_DEAD, vcpu);
+	BUG_ON(rq->nr_running != 0);
+
+	/* vcpu_put() is called after deactivate_task. This loop makes sure
+	 * that vcpu_put() was finished and vcpu can be freed */
+	while ((volatile int)vcpu->running)
+		yield();
+
+	BUG_ON(vcpu->active);	/* should be in idle_list */
+	BUG_ON(vcpu_rq(vcpu)->prev_mm != NULL);
+
+	spin_lock_irq(&fairsched_lock);
+	list_del(&vcpu->list);
+	vsched_vcpu(vsched, vcpu->id) = NULL;
+	spin_unlock_irq(&fairsched_lock);
+
+	kfree(vcpu);
+}
+
+int vsched_set_vcpus(struct vcpu_scheduler *vsched, unsigned int vcpus)
+{
+	int i, ret = 0;
+	vcpu_t vcpu;
+
+	if (vsched->num_online_vcpus < vcpus) {
+		/* need to add more VCPUs */
+		for (i = vcpus - vsched->num_online_vcpus; i > 0; i--) {
+			ret = vsched_add_vcpu(vsched);
+			if (ret < 0)
+				break;
 		}
+	} else if (vsched->num_online_vcpus > vcpus) {
+		/* remove some VCPUs */
+		while (vcpus != vsched->num_online_vcpus) {
+			vcpu = vsched_vcpu(vsched, vsched->num_online_vcpus - 1);
+			BUG_ON(!vcpu);
+			vsched_del_vcpu(vcpu, 0);
+		}
+	}
+#ifdef CONFIG_FAIRSCHED
+	vsched->node->vcpus = vsched->num_online_vcpus;
+#endif
+	return ret;
+}
+
+int vsched_mvpr(struct task_struct *p, struct vcpu_scheduler *vsched)
+{
+	vcpu_t dest_vcpu;
+	int id;
+
+	id = first_cpu(vsched->vcpu_online_map);
+	if (id >= NR_CPUS)
+		goto err;
+
+	dest_vcpu = vsched_vcpu(vsched, id);
+	set_cpus_allowed(p, CPU_MASK_ALL);
+	sched_migrate_task(p, dest_vcpu);
+
+	if (task_vsched_id(p) != vsched_id(vsched)) {
+		/* race: probably someone changed cpus_allowed? */
+		printk("vsched_mvpr: failed to move task\n");
+		goto err;
 	}
 
+	return 0;
+
+err:
+	return -EINVAL;
+}
+
+void vsched_fairsched_link(struct vcpu_scheduler *vsched,
+		struct fairsched_node *node)
+{
+	vsched->node = node;
+	node->vsched = vsched;
+}
+
+void vsched_fairsched_unlink(struct vcpu_scheduler *vsched,
+		struct fairsched_node *node)
+{
+	vsched->node = NULL;
+	node->vsched = NULL;
+}
+
+int vsched_create(int id, struct fairsched_node *node)
+{
+	struct vcpu_scheduler *vsched;
+	int res, cpus;
+
+	vsched = kmalloc(sizeof(*vsched), GFP_KERNEL);
+	if (vsched == NULL)
+		return -ENOMEM;
+
+	vsched_init(vsched, node->id);
+	vsched_fairsched_link(vsched, node);
+
+	cpus = node->vcpus ? : num_online_cpus();
+	res = vsched_set_vcpus(vsched, cpus);
+	if (res < 0)
+		goto err_add;
+
+	return 0;
+
+err_add:
+	vsched_destroy(vsched);
+	return res;
+}
+
+int vsched_destroy(struct vcpu_scheduler *vsched)
+{
+	if (vsched == NULL)
+		return 0;
+
+	vsched_set_vcpus(vsched, 0);
+
+	spin_lock_irq(&fairsched_lock);
+	if (vsched->num_online_vcpus ||
+	    !list_empty(&vsched->running_list) ||
+	    !list_empty(&vsched->active_list) ||
+	    !list_empty(&vsched->idle_list))
+		goto err_busy;
+
+	vsched_fairsched_unlink(vsched, vsched->node);
+	spin_unlock_irq(&fairsched_lock);
+
+	spin_lock(&vsched_list_lock);
+	list_del(&vsched->list);
+	spin_unlock(&vsched_list_lock);
+
+	kfree(vsched);
+	return 0;
+
+err_busy:
+	oops_in_progress = 1;
+	printk(KERN_ERR "BUG in vsched_destroy, id %d: n%d r%d a%d i%d\n",
+			vsched->id,
+			vsched->num_online_vcpus,
+			!list_empty(&vsched->running_list),
+			!list_empty(&vsched->active_list),
+			!list_empty(&vsched->idle_list));
+	spin_unlock_irq(&fairsched_lock);
+	oops_in_progress = 0;
+	return -EBUSY;
+
+	nr_online_pcpus = num_online_cpus();
+}
+#endif /* defined(CONFIG_FAIRSCHED) */
+
+static void init_boot_vcpu(void)
+{
+	int res;
+
+	/*
+	 * We setup boot_vcpu and it's runqueue until init_idle() happens
+	 * on cpu0. This is required since timer interrupts can happen
+	 * between sched_init() and init_idle().
+	 */
+	init_vcpu(&boot_idle_vcpu, raw_smp_processor_id());
+	vcpu_rq(&boot_idle_vcpu)->curr = current;
+	res = install_vcpu(&boot_idle_vcpu, &idle_vsched);
+	if (res < 0)
+		panic("Can't install boot idle vcpu");
+
+	init_vcpu(&boot_vcpu, raw_smp_processor_id());
+	vcpu_rq(&boot_vcpu)->curr = current;
+	res = install_vcpu(&boot_vcpu, &default_vsched);
+	if (res < 0)
+		panic("Can't install boot vcpu");
+
+	cpu_set(boot_vcpu.id, default_vsched.vcpu_online_map);
+
+	this_pcpu()->vcpu = &boot_idle_vcpu;
+	this_pcpu()->vsched = &idle_vsched;
+}
+#endif /* defined(CONFIG_SCHED_VCPU) || defined(CONFIG_FAIRSCHED) */
+
+static void init_pcpu(int id)
+{
+	struct pcpu_info *pcpu;
+
+	pcpu = pcpu(id);
+	pcpu->id = id;
+#ifdef CONFIG_SMP
+	pcpu->sd = NULL;
+#endif
+
+#ifndef CONFIG_SCHED_VCPU
+	init_vcpu(vcpu(id), id);
+#endif
+}
+
+static void init_pcpus(void)
+{
+	int i;
+	for (i = 0; i < NR_CPUS; i++)
+		init_pcpu(i);
+}
+
+void __init sched_init(void)
+{
+	init_pcpus();
+#if defined(CONFIG_SCHED_VCPU)
+	vsched_init(&idle_vsched, -1);
+	vsched_init(&default_vsched, 0);
+#if defined(CONFIG_FAIRSCHED)
+	fairsched_init_early();
+	vsched_fairsched_link(&idle_vsched, &fairsched_idle_node);
+	vsched_fairsched_link(&default_vsched, &fairsched_init_node);
+#endif
+	init_boot_vcpu();
+#else
+#if defined(CONFIG_FAIRSCHED)
+	fairsched_init_early();
+#endif
+#endif
+
 	/*
 	 * The boot idle thread does lazy MMU switching as well:
 	 */
@@ -6207,6 +8071,155 @@ void __init sched_init(void)
 	init_idle(current, smp_processor_id());
 }
 
+#ifdef CONFIG_SCHED_VCPU
+static void show_vcpu_list(struct vcpu_scheduler *vsched, struct list_head *lh)
+{
+	cpumask_t m;
+	vcpu_t vcpu;
+	int i;
+
+	cpus_clear(m);
+	list_for_each_entry(vcpu, lh, list)
+		cpu_set(vcpu->id, m);
+
+	for (i = 0; i < NR_CPUS; i++)
+		if (cpu_isset(i, m))
+			printk("%d ", i);
+}
+
+#define PRINT(s, sz, fmt...)				\
+	do {						\
+		int __out;				\
+		__out = scnprintf(*s, *sz, fmt);	\
+		*s += __out;				\
+		*sz -= __out;				\
+	} while(0)
+
+static void show_rq_array(prio_array_t *array, char *header, char **s, int *sz)
+{
+	struct list_head *list;
+	task_t *p;
+	int k, h;
+
+	h = 0;
+	for (k = 0; k < MAX_PRIO; k++) {
+		list = array->queue + k;
+		if (list_empty(list))
+			continue;
+
+		if (!h) {
+			PRINT(s, sz, header);
+			h = 1;
+		}
+
+		PRINT(s, sz, " prio %d (", k);
+		list_for_each_entry(p, list, run_list)
+			PRINT(s, sz, "%s[%d] ", p->comm, p->pid);
+		PRINT(s, sz, ")");
+	}
+	if (h)
+		PRINT(s, sz, "\n");
+}
+
+static void show_vcpu(vcpu_t vcpu)
+{
+	runqueue_t *rq;
+	char buf[1024], *s;
+	unsigned long flags;
+	int sz;
+	unsigned long nr_running, cpu_load[3];
+	unsigned long long nr_switches;
+	struct sched_domain *sd;
+	struct task_struct *curr;
+
+	if (vcpu == NULL)
+		return;
+
+	printk("  vcpu %d: last_pcpu %d, state %s%s\n",
+			vcpu->id, vcpu->last_pcpu,
+			vcpu->active ? "A" : "",
+			vcpu->running ? "R" : "");
+	s = buf;
+	sz = sizeof(buf) - 1;
+
+	rq = vcpu_rq(vcpu);
+	spin_lock_irqsave(&rq->lock, flags);
+	nr_running = rq->nr_running;
+#ifdef CONFIG_SMP
+	cpu_load[0] = rq->cpu_load[0];
+	cpu_load[1] = rq->cpu_load[1];
+	cpu_load[2] = rq->cpu_load[2];
+	sd = rq->sd;
+#else
+	cpu_load[0] = cpu_load[1] = cpu_load[2] = 0;
+	sd = NULL;
+#endif
+	nr_switches = rq->nr_switches;
+	curr = rq->curr;
+
+	show_rq_array(rq->active, "      active:", &s, &sz);
+	show_rq_array(rq->expired, "      expired:", &s, &sz);
+	spin_unlock_irqrestore(&rq->lock, flags);
+	*s = 0;
+
+	printk("    rq: running %lu, load {%lu,%lu,%lu}, sw %Lu, sd %p, curr %p\n",
+		nr_running, cpu_load[0], cpu_load[1], cpu_load[2], nr_switches,
+		sd, curr);
+
+	printk("%s", buf);
+}
+
+static inline void fairsched_show_node(struct vcpu_scheduler *vsched)
+{
+#ifdef CONFIG_FAIRSCHED
+	struct fairsched_node *node;
+
+	node = vsched->node;
+	printk("fsnode: ready %d run %d cpu %d vsched %p, pcpu %d\n",
+			node->nr_ready, node->nr_runnable, node->nr_pcpu,
+			node->vsched, smp_processor_id());
+#endif
+}
+
+static void __show_vsched(struct vcpu_scheduler *vsched)
+{
+	char mask[NR_CPUS + 1];
+	int i;
+
+	spin_lock(&fairsched_lock);
+	printk("vsched id=%d\n", vsched_id(vsched));
+	fairsched_show_node(vsched);
+
+	printk("  idle cpus ");
+	show_vcpu_list(vsched, &vsched->idle_list);
+	printk("; active cpus ");
+	show_vcpu_list(vsched, &vsched->active_list);
+	printk("; running cpus ");
+	show_vcpu_list(vsched, &vsched->running_list);
+	printk("\n");
+
+	cpumask_scnprintf(mask, NR_CPUS, vsched->vcpu_online_map);
+	printk("  num_online_cpus=%d, mask=%s (w=%d)\n",
+			vsched->num_online_vcpus, mask,
+			cpus_weight(vsched->vcpu_online_map));
+	spin_unlock(&fairsched_lock);
+
+	for (i = 0; i < NR_CPUS; i++)
+		show_vcpu(vsched->vcpu[i]);
+}
+
+void show_vsched(void)
+{
+	struct vcpu_scheduler *vsched;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vsched_list_lock, flags);
+	list_for_each_entry (vsched, &vsched_list, list)
+		__show_vsched(vsched);
+	spin_unlock_irqrestore(&vsched_list_lock, flags);
+}
+#endif /* CONFIG_SCHED_VCPU */
+
 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
 void __might_sleep(char *file, int line)
 {
@@ -6238,7 +8251,7 @@ void normalize_rt_tasks(void)
 	runqueue_t *rq;
 
 	read_lock_irq(&tasklist_lock);
-	for_each_process (p) {
+	for_each_process_all (p) {
 		if (!rt_task(p))
 			continue;
 
@@ -6279,7 +8292,7 @@ void normalize_rt_tasks(void)
  */
 task_t *curr_task(int cpu)
 {
-	return cpu_curr(cpu);
+	return vcpu_rq(pcpu(cpu)->vcpu)->curr;
 }
 
 /**
@@ -6299,7 +8312,7 @@ task_t *curr_task(int cpu)
  */
 void set_curr_task(int cpu, task_t *p)
 {
-	cpu_curr(cpu) = p;
+	vcpu_rq(pcpu(cpu)->vcpu)->curr = p;
 }
 
 #endif
@@ -6360,12 +8373,12 @@ kdb_runqueue(unsigned long cpu, kdb_prin
 {
 	struct runqueue *rq;
 
-	rq = cpu_rq(cpu);
+	rq = vcpu_rq(vsched_vcpu(this_vsched(), cpu));
 
 	xxx_printf("CPU%ld lock:%s curr:0x%p(%d)(%s)",
 		   cpu, (spin_is_locked(&rq->lock))?"LOCKED":"free",
 		   rq->curr, rq->curr->pid, rq->curr->comm);
-	if (rq->curr == rq->idle)
+	if (rq->curr == this_pcpu()->idle)
 		xxx_printf(" is idle");
 	xxx_printf("\n ");
 #ifdef CONFIG_SMP
diff -upr linux-2.6.16.46-0.12.orig/kernel/signal.c linux-2.6.16.46-0.12-027test011/kernel/signal.c
--- linux-2.6.16.46-0.12.orig/kernel/signal.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/signal.c	2007-08-28 17:35:33.000000000 +0400
@@ -26,6 +26,8 @@
 #include <linux/signal.h>
 #include <linux/capability.h>
 #include <linux/delayacct.h>
+#include <linux/kmem_cache.h>
+#include <ub/ub_misc.h>
 #include <asm/param.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -36,7 +38,8 @@
  * SLAB caches for signal bits.
  */
 
-static kmem_cache_t *sigqueue_cachep;
+kmem_cache_t *sigqueue_cachep;
+EXPORT_SYMBOL_GPL(sigqueue_cachep);
 
 /*
  * In POSIX a signal is sent either to a specific thread (Linux task)
@@ -156,6 +159,23 @@ static kmem_cache_t *sigqueue_cachep;
 	(!T(signr, SIG_KERNEL_IGNORE_MASK|SIG_KERNEL_STOP_MASK) && \
 	 (t)->sighand->action[(signr)-1].sa.sa_handler == SIG_DFL)
 
+static int sig_ve_ignored(int sig, struct siginfo *info, struct task_struct *t)
+{
+	struct ve_struct *ve;
+
+	/* always allow signals from the kernel */
+	if (info == SEND_SIG_FORCED ||
+		       (!is_si_special(info) && SI_FROMKERNEL(info)))
+		return 0;
+
+	ve = current->ve_task_info.owner_env;
+	if (ve->init_entry != t)
+		return 0;
+	if (ve_is_super(get_exec_env()))
+		return 0;
+	return !sig_user_defined(t, sig) || sig_kernel_only(sig);
+}
+
 static int sig_ignored(struct task_struct *t, int sig)
 {
 	void __user * handler;
@@ -222,6 +242,7 @@ fastcall void recalc_sigpending_tsk(stru
 	else
 		clear_tsk_thread_flag(t, TIF_SIGPENDING);
 }
+EXPORT_SYMBOL_GPL(recalc_sigpending_tsk);
 
 void recalc_sigpending(void)
 {
@@ -272,8 +293,13 @@ static struct sigqueue *__sigqueue_alloc
 	atomic_inc(&t->user->sigpending);
 	if (override_rlimit ||
 	    atomic_read(&t->user->sigpending) <=
-			t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur)
+			t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur) {
 		q = kmem_cache_alloc(sigqueue_cachep, flags);
+		if (q && ub_siginfo_charge(q, get_task_ub(t))) {
+			kmem_cache_free(sigqueue_cachep, q);
+			q = NULL;
+		}
+	}
 	if (unlikely(q == NULL)) {
 		atomic_dec(&t->user->sigpending);
 	} else {
@@ -290,6 +316,7 @@ static void __sigqueue_free(struct sigqu
 		return;
 	atomic_dec(&q->user->sigpending);
 	free_uid(q->user);
+	ub_siginfo_uncharge(q);
 	kmem_cache_free(sigqueue_cachep, q);
 }
 
@@ -526,7 +553,16 @@ static int __dequeue_signal(struct sigpe
 {
 	int sig = 0;
 
-	sig = next_signal(pending, mask);
+	/* SIGKILL must have priority, otherwise it is quite easy
+	 * to create an unkillable process, sending sig < SIGKILL
+	 * to self */
+	if (unlikely(sigismember(&pending->signal, SIGKILL))) {
+		if (!sigismember(mask, SIGKILL))
+			sig = SIGKILL;
+	}
+
+	if (likely(!sig))
+		sig = next_signal(pending, mask);
 	if (sig) {
 		if (current->notifier) {
 			if (sigismember(current->notifier_mask, sig)) {
@@ -620,6 +656,7 @@ void signal_wake_up(struct task_struct *
 	if (!wake_up_state(t, mask))
 		kick_process(t);
 }
+EXPORT_SYMBOL_GPL(signal_wake_up);
 
 /*
  * Remove signals in mask from the pending set and queue.
@@ -840,7 +877,7 @@ static int send_signal(int sig, struct s
 			q->info.si_signo = sig;
 			q->info.si_errno = 0;
 			q->info.si_code = SI_USER;
-			q->info.si_pid = current->pid;
+			q->info.si_pid = virt_pid(current);
 			q->info.si_uid = current->uid;
 			break;
 		case (unsigned long) SEND_SIG_PRIV:
@@ -1140,7 +1177,8 @@ retry:
 			spin_unlock_irqrestore(&sp->siglock, flags);
 			return -ESRCH;
 		}
-		ret = __group_send_sig_info(sig, info, p);
+		ret = sig_ve_ignored(sig, info, p) ? 0 :
+			__group_send_sig_info(sig, info, p);
 		spin_unlock_irqrestore(&sp->siglock, flags);
 	}
 
@@ -1160,13 +1198,18 @@ int __kill_pg_info(int sig, struct sigin
 	if (pgrp <= 0)
 		return -EINVAL;
 
+	/* Use __vpid_to_pid(). This function is used under write_lock
+	 * tasklist_lock. */
+	if (is_virtual_pid(pgrp))
+		pgrp = __vpid_to_pid(pgrp);
+
 	success = 0;
 	retval = -ESRCH;
-	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
+	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
 		int err = group_send_sig_info(sig, info, p);
 		success |= !err;
 		retval = err;
-	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
+	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
 	return success ? 0 : retval;
 }
 
@@ -1194,7 +1237,7 @@ kill_proc_info(int sig, struct siginfo *
 		read_lock(&tasklist_lock);
 		acquired_tasklist_lock = 1;
 	}
-	p = find_task_by_pid(pid);
+	p = find_task_by_pid_ve(pid);
 	error = -ESRCH;
 	if (p)
 		error = group_send_sig_info(sig, info, p);
@@ -1215,7 +1258,7 @@ int kill_proc_info_as_uid(int sig, struc
 		return ret;
 
 	read_lock(&tasklist_lock);
-	p = find_task_by_pid(pid);
+	p = find_task_by_pid_ve(pid);
 	if (!p) {
 		ret = -ESRCH;
 		goto out_unlock;
@@ -1254,8 +1297,8 @@ static int kill_something_info(int sig, 
 		struct task_struct * p;
 
 		read_lock(&tasklist_lock);
-		for_each_process(p) {
-			if (p->pid > 1 && p->tgid != current->tgid) {
+		for_each_process_ve(p) {
+			if (virt_pid(p) > 1 && p->tgid != current->tgid) {
 				int err = group_send_sig_info(sig, info, p);
 				++count;
 				if (err != -EPERM)
@@ -1563,9 +1606,17 @@ void do_notify_parent(struct task_struct
 	BUG_ON(!tsk->ptrace &&
 	       (tsk->group_leader != tsk || !thread_group_empty(tsk)));
 
+#ifdef CONFIG_VE
+	/* Allow to send only SIGCHLD from VE */
+	if (sig != SIGCHLD &&
+			tsk->ve_task_info.owner_env !=
+			tsk->parent->ve_task_info.owner_env)
+		sig = SIGCHLD;
+#endif
+
 	info.si_signo = sig;
 	info.si_errno = 0;
-	info.si_pid = tsk->pid;
+	info.si_pid = get_task_pid_ve(tsk, tsk->parent->ve_task_info.owner_env);
 	info.si_uid = tsk->uid;
 
 	/* FIXME: find out whether or not this is supposed to be c*time. */
@@ -1630,7 +1681,7 @@ static void do_notify_parent_cldstop(str
 
 	info.si_signo = SIGCHLD;
 	info.si_errno = 0;
-	info.si_pid = tsk->pid;
+	info.si_pid = get_task_pid_ve(tsk, VE_TASK_INFO(parent)->owner_env);
 	info.si_uid = tsk->uid;
 
 	/* FIXME: find out whether or not this is supposed to be c*time. */
@@ -1688,6 +1739,7 @@ static void ptrace_stop(int exit_code, i
 	current->exit_code = exit_code;
 
 	/* Let the debugger run.  */
+	set_pn_state(current, PN_STOP_SIGNAL);
 	set_current_state(TASK_TRACED);
 	spin_unlock_irq(&current->sighand->siglock);
 	try_to_freeze();
@@ -1710,6 +1762,7 @@ static void ptrace_stop(int exit_code, i
 		current->exit_code = nostop_code;
 	}
 
+	clear_pn_state(current);
 	/*
 	 * We are back.  Now reacquire the siglock before touching
 	 * last_siginfo, so that we are sure to have synchronized with
@@ -1765,7 +1818,9 @@ finish_stop(int stop_count)
 	read_unlock(&tasklist_lock);
 
 out:
+	set_stop_state(current);
 	schedule();
+	clear_stop_state(current);
 	/*
 	 * Now we don't run again until continued.
 	 */
@@ -1918,6 +1973,40 @@ static int handle_group_stop(void)
 	return 1;
 }
 
+atomic_t global_suspend = ATOMIC_INIT(0);
+
+/* Refrigerator is place where frozen processes are stored :-). */
+void refrigerator(void)
+{
+	/* Hmm, should we be allowed to suspend when there are realtime
+	   processes around? */
+	long save;
+	save = current->state;
+	current->state = TASK_UNINTERRUPTIBLE;
+	/* printk("="); */
+
+	spin_lock_irq(&current->sighand->siglock);
+	if (test_and_clear_thread_flag(TIF_FREEZE)) {
+		recalc_sigpending(); /* We sent fake signal, clean it up */
+		if (atomic_read(&global_suspend) ||
+		    atomic_read(&get_exec_env()->suspend)) {
+			current->flags |= PF_FROZEN;
+		} else {
+			current->state = save;
+		}
+	} else {
+		/* Freeze request could be canceled before we entered
+		 * refrigerator(). In this case we do nothing. */
+		current->state = save;
+	}
+	spin_unlock_irq(&current->sighand->siglock);
+
+	while (current->flags & PF_FROZEN)
+		schedule();
+	current->state = save;
+}
+EXPORT_SYMBOL(refrigerator);
+
 int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka,
 			  struct pt_regs *regs, void *cookie)
 {
@@ -1959,7 +2048,7 @@ relock:
 				info->si_signo = signr;
 				info->si_errno = 0;
 				info->si_code = SI_USER;
-				info->si_pid = current->parent->pid;
+				info->si_pid = virt_pid(current->parent);
 				info->si_uid = current->parent->uid;
 			}
 
@@ -2310,7 +2399,6 @@ sys_rt_sigtimedwait(const sigset_t __use
 
 			timeout = schedule_timeout_interruptible(timeout);
 
-			try_to_freeze();
 			spin_lock_irq(&current->sighand->siglock);
 			sig = dequeue_signal(current, &these, &info);
 			current->blocked = current->real_blocked;
@@ -2343,7 +2431,7 @@ sys_kill(int pid, int sig)
 	info.si_signo = sig;
 	info.si_errno = 0;
 	info.si_code = SI_USER;
-	info.si_pid = current->tgid;
+	info.si_pid = virt_tgid(current);
 	info.si_uid = current->uid;
 
 	return kill_something_info(sig, &info, pid);
@@ -2359,12 +2447,12 @@ static int do_tkill(int tgid, int pid, i
 	info.si_signo = sig;
 	info.si_errno = 0;
 	info.si_code = SI_TKILL;
-	info.si_pid = current->tgid;
+	info.si_pid = virt_tgid(current);
 	info.si_uid = current->uid;
 
 	read_lock(&tasklist_lock);
-	p = find_task_by_pid(pid);
-	if (p && (tgid <= 0 || p->tgid == tgid)) {
+	p = find_task_by_pid_ve(pid);
+	if (p && (tgid <= 0 || virt_tgid(p) == tgid)) {
 		error = check_kill_permission(sig, &info, p);
 		/*
 		 * The null signal is a permissions and process existence
@@ -2372,8 +2460,10 @@ static int do_tkill(int tgid, int pid, i
 		 */
 		if (!error && sig && p->sighand) {
 			spin_lock_irq(&p->sighand->siglock);
-			handle_stop_signal(sig, p);
-			error = specific_send_sig_info(sig, &info, p);
+			if (!sig_ve_ignored(sig, &info, p)) {
+				handle_stop_signal(sig, p);
+				error = specific_send_sig_info(sig, &info, p);
+			}
 			spin_unlock_irq(&p->sighand->siglock);
 		}
 	}
@@ -2755,7 +2845,7 @@ void __init signals_init(void)
 		kmem_cache_create("sigqueue",
 				  sizeof(struct sigqueue),
 				  __alignof__(struct sigqueue),
-				  SLAB_PANIC, NULL, NULL);
+				  SLAB_PANIC|SLAB_UBC, NULL, NULL);
 }
 
 #ifdef CONFIG_KDB
diff -upr linux-2.6.16.46-0.12.orig/kernel/softirq.c linux-2.6.16.46-0.12-027test011/kernel/softirq.c
--- linux-2.6.16.46-0.12.orig/kernel/softirq.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/softirq.c	2007-08-28 17:35:31.000000000 +0400
@@ -13,10 +13,13 @@
 #include <linux/mm.h>
 #include <linux/notifier.h>
 #include <linux/percpu.h>
+#include <linux/sysctl.h>
 #include <linux/cpu.h>
 #include <linux/kthread.h>
 #include <linux/rcupdate.h>
 
+#include <ub/beancounter.h>
+
 #include <asm/irq.h>
 /*
    - No shared variables, all the data are CPU local.
@@ -44,6 +47,8 @@ EXPORT_SYMBOL(irq_stat);
 static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
 
 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
+static DEFINE_PER_CPU(struct task_struct *, ksoftirqd_wakeup);
+static int ksoftirqd_stat[NR_CPUS];
 
 /*
  * we cannot loop indefinitely here to avoid userspace starvation,
@@ -54,7 +59,7 @@ static DEFINE_PER_CPU(struct task_struct
 static inline void wakeup_softirqd(void)
 {
 	/* Interrupts are disabled: no need to stop preemption */
-	struct task_struct *tsk = __get_cpu_var(ksoftirqd);
+	struct task_struct *tsk = __get_cpu_var(ksoftirqd_wakeup);
 
 	if (tsk && tsk->state != TASK_RUNNING)
 		wake_up_process(tsk);
@@ -73,10 +78,14 @@ static inline void wakeup_softirqd(void)
 
 asmlinkage void __do_softirq(void)
 {
+	struct user_beancounter *ub;
 	struct softirq_action *h;
 	__u32 pending;
 	int max_restart = MAX_SOFTIRQ_RESTART;
 	int cpu;
+	struct ve_struct *envid;
+
+	envid = set_exec_env(get_ve0());
 
 	pending = local_softirq_pending();
 
@@ -90,6 +99,7 @@ restart:
 
 	h = softirq_vec;
 
+	ub = set_exec_ub(get_ub0());
 	do {
 		if (pending & 1) {
 			h->action(h);
@@ -98,6 +108,7 @@ restart:
 		h++;
 		pending >>= 1;
 	} while (pending);
+	(void)set_exec_ub(ub);
 
 	local_irq_disable();
 
@@ -108,6 +119,7 @@ restart:
 	if (pending)
 		wakeup_softirqd();
 
+	(void)set_exec_env(envid);
 	__local_bh_enable();
 }
 
@@ -164,6 +176,7 @@ EXPORT_SYMBOL(local_bh_enable);
 void irq_exit(void)
 {
 	account_system_vtime(current);
+	restore_context();
 	sub_preempt_count(IRQ_EXIT_OFFSET);
 	if (!in_interrupt() && local_softirq_pending())
 		invoke_softirq();
@@ -513,6 +526,52 @@ static int __devinit cpu_callback(struct
 	return NOTIFY_OK;
 }
 
+static int proc_ksoftirqd(ctl_table *ctl, int write, struct file *filp,
+		void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret, cpu;
+
+	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+	if (!write)
+		return ret;
+
+	for_each_online_cpu(cpu) {
+		per_cpu(ksoftirqd_wakeup, cpu) =
+			ksoftirqd_stat[cpu] ? per_cpu(ksoftirqd, cpu) : NULL;
+	}
+	return ret;
+}
+
+static int sysctl_ksoftirqd(ctl_table *table, int __user *name, int nlen,
+		void __user *oldval, size_t __user *oldlenp,
+		void __user *newval, size_t newlen, void **context)
+{
+	return -EINVAL;
+}
+
+static ctl_table debug_table[] = {
+	{
+		.ctl_name	= 1246,
+		.procname	= "ksoftirqd",
+		.data		= ksoftirqd_stat,
+		.maxlen		= sizeof(ksoftirqd_stat),
+		.mode		= 0644,
+		.proc_handler	= &proc_ksoftirqd,
+		.strategy	= &sysctl_ksoftirqd
+	},
+	{0}
+};
+
+static ctl_table root_table[] = {
+	{
+		.ctl_name	= CTL_DEBUG,
+		.procname	= "debug",
+		.mode		= 0555,
+		.child		= debug_table
+	},
+	{0}
+};
+
 static struct notifier_block __devinitdata cpu_nfb = {
 	.notifier_call = cpu_callback
 };
@@ -523,5 +582,6 @@ __init int spawn_ksoftirqd(void)
 	cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
 	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
 	register_cpu_notifier(&cpu_nfb);
+	register_sysctl_table(root_table, 0);
 	return 0;
 }
diff -upr linux-2.6.16.46-0.12.orig/kernel/stop_machine.c linux-2.6.16.46-0.12-027test011/kernel/stop_machine.c
--- linux-2.6.16.46-0.12.orig/kernel/stop_machine.c	2007-08-24 19:28:09.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/stop_machine.c	2007-08-28 17:35:34.000000000 +0400
@@ -4,6 +4,7 @@
 #include <linux/cpu.h>
 #include <linux/err.h>
 #include <linux/syscalls.h>
+#include <linux/delay.h>
 #include <asm/atomic.h>
 #include <asm/semaphore.h>
 #include <asm/uaccess.h>
@@ -56,7 +57,7 @@ static int stopmachine(void *cpu)
 		/* Yield in first stage: migration threads need to
 		 * help our sisters onto their CPUs. */
 		if (!prepared && !irqs_disabled)
-			yield();
+			msleep(10);
 		else
 			cpu_relax();
 	}
@@ -96,7 +97,7 @@ static int stop_machine(void)
 	stopmachine_state = STOPMACHINE_WAIT;
 
 	for_each_online_cpu(i) {
-		if (i == raw_smp_processor_id())
+		if (i == task_cpu(current))
 			continue;
 		ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
 		if (ret < 0)
@@ -106,7 +107,7 @@ static int stop_machine(void)
 
 	/* Wait for them all to come to life. */
 	while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
-		yield();
+		msleep(10);
 
 	/* If some failed, kill them all. */
 	if (ret < 0) {
@@ -177,7 +178,7 @@ struct task_struct *__stop_machine_run(i
 
 	/* If they don't care which CPU fn runs on, bind to any online one. */
 	if (cpu == NR_CPUS)
-		cpu = raw_smp_processor_id();
+		cpu = task_cpu(current);
 
 	p = kthread_create(do_stop, &smdata, "kstopmachine");
 	if (!IS_ERR(p)) {
diff -upr linux-2.6.16.46-0.12.orig/kernel/sys.c linux-2.6.16.46-0.12-027test011/kernel/sys.c
--- linux-2.6.16.46-0.12.orig/kernel/sys.c	2007-08-24 19:28:10.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/sys.c	2007-08-28 17:35:33.000000000 +0400
@@ -11,6 +11,7 @@
 #include <linux/mman.h>
 #include <linux/smp_lock.h>
 #include <linux/notifier.h>
+#include <linux/virtinfo.h>
 #include <linux/reboot.h>
 #include <linux/prctl.h>
 #include <linux/init.h>
@@ -242,6 +243,102 @@ int capable(int cap)
 EXPORT_SYMBOL(capable);
 #endif
 
+DECLARE_MUTEX(virtinfo_sem);
+EXPORT_SYMBOL(virtinfo_sem);
+static struct vnotifier_block *virtinfo_chain[VIRT_TYPES];
+
+void __virtinfo_notifier_register(int type, struct vnotifier_block *nb)
+{
+	struct vnotifier_block **p;
+
+	for (p = &virtinfo_chain[type];
+	     *p != NULL && nb->priority < (*p)->priority;
+	     p = &(*p)->next);
+	nb->next = *p;
+	smp_wmb();
+	*p = nb;
+}
+
+EXPORT_SYMBOL(__virtinfo_notifier_register);
+
+void virtinfo_notifier_register(int type, struct vnotifier_block *nb)
+{
+	down(&virtinfo_sem);
+	__virtinfo_notifier_register(type, nb);
+	up(&virtinfo_sem);
+}
+
+EXPORT_SYMBOL(virtinfo_notifier_register);
+
+struct virtinfo_cnt_struct {
+	volatile unsigned long exit[NR_CPUS];
+	volatile unsigned long entry;
+};
+static DEFINE_PER_CPU(struct virtinfo_cnt_struct, virtcnt);
+
+void virtinfo_notifier_unregister(int type, struct vnotifier_block *nb)
+{
+	struct vnotifier_block **p;
+	int entry_cpu, exit_cpu;
+	unsigned long cnt, ent;
+
+	down(&virtinfo_sem);
+	for (p = &virtinfo_chain[type]; *p != nb; p = &(*p)->next);
+	*p = nb->next;
+	smp_mb();
+
+	for_each_cpu_mask(entry_cpu, cpu_possible_map) {
+		while (1) {
+			cnt = 0;
+			for_each_cpu_mask(exit_cpu, cpu_possible_map)
+				cnt +=
+				    per_cpu(virtcnt, entry_cpu).exit[exit_cpu];
+			smp_rmb();
+			ent = per_cpu(virtcnt, entry_cpu).entry;
+			if (cnt == ent)
+				break;
+			__set_current_state(TASK_UNINTERRUPTIBLE);
+			schedule_timeout(HZ / 100);
+		}
+	}
+	up(&virtinfo_sem);
+}
+
+EXPORT_SYMBOL(virtinfo_notifier_unregister);
+
+int virtinfo_notifier_call(int type, unsigned long n, void *data)
+{
+	int ret;
+	int entry_cpu, exit_cpu;
+	struct vnotifier_block *nb;
+
+	entry_cpu = get_cpu();
+	per_cpu(virtcnt, entry_cpu).entry++;
+	smp_wmb();
+	put_cpu();
+
+	nb = virtinfo_chain[type];
+	ret = NOTIFY_DONE;
+	while (nb)
+	{
+		ret = nb->notifier_call(nb, n, data, ret);
+		if(ret & NOTIFY_STOP_MASK) {
+			ret &= ~NOTIFY_STOP_MASK;
+			break;
+		}
+		nb = nb->next;
+	}
+
+	exit_cpu = get_cpu();
+	smp_wmb();
+	per_cpu(virtcnt, entry_cpu).exit[exit_cpu]++;
+	put_cpu();
+
+	return ret;
+}
+
+EXPORT_SYMBOL(virtinfo_notifier_call);
+
 static int set_one_prio(struct task_struct *p, int niceval, int error)
 {
 	int no_nice;
@@ -287,17 +384,19 @@ asmlinkage long sys_setpriority(int whic
 	switch (which) {
 		case PRIO_PROCESS:
 			if (!who)
-				who = current->pid;
-			p = find_task_by_pid(who);
+				who = virt_pid(current);
+			p = find_task_by_pid_ve(who);
 			if (p)
 				error = set_one_prio(p, niceval, error);
 			break;
 		case PRIO_PGRP:
 			if (!who)
 				who = process_group(current);
-			do_each_task_pid(who, PIDTYPE_PGID, p) {
+			else
+				who = vpid_to_pid(who);
+			do_each_task_pid_ve(who, PIDTYPE_PGID, p) {
 				error = set_one_prio(p, niceval, error);
-			} while_each_task_pid(who, PIDTYPE_PGID, p);
+			} while_each_task_pid_ve(who, PIDTYPE_PGID, p);
 			break;
 		case PRIO_USER:
 			user = current->user;
@@ -307,10 +406,10 @@ asmlinkage long sys_setpriority(int whic
 				if ((who != current->uid) && !(user = find_user(who)))
 					goto out_unlock;	/* No processes for this user */
 
-			do_each_thread(g, p)
+			do_each_thread_ve(g, p)
 				if (p->uid == who)
 					error = set_one_prio(p, niceval, error);
-			while_each_thread(g, p);
+			while_each_thread_ve(g, p);
 			if (who != current->uid)
 				free_uid(user);		/* For find_user() */
 			break;
@@ -340,8 +439,8 @@ asmlinkage long sys_getpriority(int whic
 	switch (which) {
 		case PRIO_PROCESS:
 			if (!who)
-				who = current->pid;
-			p = find_task_by_pid(who);
+				who = virt_pid(current);
+			p = find_task_by_pid_ve(who);
 			if (p) {
 				niceval = 20 - task_nice(p);
 				if (niceval > retval)
@@ -351,11 +450,13 @@ asmlinkage long sys_getpriority(int whic
 		case PRIO_PGRP:
 			if (!who)
 				who = process_group(current);
-			do_each_task_pid(who, PIDTYPE_PGID, p) {
+			else
+				who = vpid_to_pid(who);
+			do_each_task_pid_ve(who, PIDTYPE_PGID, p) {
 				niceval = 20 - task_nice(p);
 				if (niceval > retval)
 					retval = niceval;
-			} while_each_task_pid(who, PIDTYPE_PGID, p);
+			} while_each_task_pid_ve(who, PIDTYPE_PGID, p);
 			break;
 		case PRIO_USER:
 			user = current->user;
@@ -365,13 +466,13 @@ asmlinkage long sys_getpriority(int whic
 				if ((who != current->uid) && !(user = find_user(who)))
 					goto out_unlock;	/* No processes for this user */
 
-			do_each_thread(g, p)
+			do_each_thread_ve(g, p)
 				if (p->uid == who) {
 					niceval = 20 - task_nice(p);
 					if (niceval > retval)
 						retval = niceval;
 				}
-			while_each_thread(g, p);
+			while_each_thread_ve(g, p);
 			if (who != current->uid)
 				free_uid(user);		/* for find_user() */
 			break;
@@ -503,6 +604,24 @@ asmlinkage long sys_reboot(int magic1, i
 	                magic2 != LINUX_REBOOT_MAGIC2C))
 		return -EINVAL;
 
+#ifdef CONFIG_VE
+	if (!ve_is_super(get_exec_env()))
+		switch (cmd) {
+		case LINUX_REBOOT_CMD_RESTART:
+		case LINUX_REBOOT_CMD_HALT:
+		case LINUX_REBOOT_CMD_POWER_OFF:
+		case LINUX_REBOOT_CMD_RESTART2:
+			force_sig(SIGKILL, get_exec_env()->init_entry);
+
+		case LINUX_REBOOT_CMD_CAD_ON:
+		case LINUX_REBOOT_CMD_CAD_OFF:
+			return 0;
+
+		default:
+			return -EINVAL;
+		}
+#endif
+
 	/* Instead of trying to make the power_off code look like
 	 * halt when pm_power_off is not set do it the easy way.
 	 */
@@ -692,7 +811,7 @@ asmlinkage long sys_setgid(gid_t gid)
 	return 0;
 }
   
-static int set_user(uid_t new_ruid, int dumpclear)
+int set_user(uid_t new_ruid, int dumpclear)
 {
 	struct user_struct *new_user;
 
@@ -702,7 +821,7 @@ static int set_user(uid_t new_ruid, int 
 
 	if (atomic_read(&new_user->processes) >=
 				current->signal->rlim[RLIMIT_NPROC].rlim_cur &&
-			new_user != &root_user) {
+			new_ruid != 0) {
 		free_uid(new_user);
 		return -EAGAIN;
 	}
@@ -717,6 +836,7 @@ static int set_user(uid_t new_ruid, int 
 	current->uid = new_ruid;
 	return 0;
 }
+EXPORT_SYMBOL(set_user);
 
 /*
  * Unprivileged users may change the real uid to the effective uid
@@ -1005,8 +1125,27 @@ asmlinkage long sys_setfsgid(gid_t gid)
 	return old_fsgid;
 }
 
+#ifdef CONFIG_VE
+unsigned long long ve_relative_clock(struct timespec * ts)
+{
+	unsigned long long offset = 0;
+
+	if (ts->tv_sec > get_exec_env()->start_timespec.tv_sec ||
+	    (ts->tv_sec == get_exec_env()->start_timespec.tv_sec &&
+	     ts->tv_nsec >= get_exec_env()->start_timespec.tv_nsec))
+		offset = (unsigned long long)(ts->tv_sec -
+			get_exec_env()->start_timespec.tv_sec) * NSEC_PER_SEC
+			+ ts->tv_nsec -	get_exec_env()->start_timespec.tv_nsec;
+	return nsec_to_clock_t(offset);
+}
+#endif
+
 asmlinkage long sys_times(struct tms __user * tbuf)
 {
+#ifdef CONFIG_VE
+	struct timespec now;
+#endif
+
 	/*
 	 *	In the SMP world we might just be unlucky and have one of
 	 *	the times increment as we use it. Since the value is an
@@ -1085,7 +1224,13 @@ asmlinkage long sys_times(struct tms __u
 		if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
 			return -EFAULT;
 	}
+#ifndef CONFIG_VE
 	return (long) jiffies_64_to_clock_t(get_jiffies_64());
+#else
+	/* Compare to calculation in fs/proc/array.c */
+	do_posix_clock_monotonic_gettime(&now);
+	return ve_relative_clock(&now);
+#endif
 }
 
 /*
@@ -1106,21 +1251,24 @@ asmlinkage long sys_setpgid(pid_t pid, p
 	struct task_struct *p;
 	struct task_struct *group_leader = current->group_leader;
 	int err = -EINVAL;
+	int _pgid;
 
 	if (!pid)
-		pid = group_leader->pid;
+		pid = virt_pid(group_leader);
 	if (!pgid)
 		pgid = pid;
 	if (pgid < 0)
 		return -EINVAL;
 
+	_pgid = vpid_to_pid(pgid);
+
 	/* From this point forward we keep holding onto the tasklist lock
 	 * so that our parent does not change from under us. -DaveM
 	 */
 	write_lock_irq(&tasklist_lock);
 
 	err = -ESRCH;
-	p = find_task_by_pid(pid);
+	p = find_task_by_pid_ve(pid);
 	if (!p)
 		goto out;
 
@@ -1145,25 +1293,35 @@ asmlinkage long sys_setpgid(pid_t pid, p
 	if (p->signal->leader)
 		goto out;
 
-	if (pgid != pid) {
+	pgid = virt_pid(p);
+	if (_pgid != p->pid) {
 		struct task_struct *p;
 
-		do_each_task_pid(pgid, PIDTYPE_PGID, p) {
-			if (p->signal->session == group_leader->signal->session)
+		do_each_task_pid_ve(_pgid, PIDTYPE_PGID, p) {
+			if (p->signal->session == group_leader->signal->session) {
+				pgid = virt_pgid(p);
 				goto ok_pgid;
-		} while_each_task_pid(pgid, PIDTYPE_PGID, p);
+			}
+		} while_each_task_pid_ve(_pgid, PIDTYPE_PGID, p);
 		goto out;
 	}
 
 ok_pgid:
-	err = security_task_setpgid(p, pgid);
+	err = security_task_setpgid(p, _pgid);
 	if (err)
 		goto out;
 
-	if (process_group(p) != pgid) {
+	if (process_group(p) != _pgid) {
 		detach_pid(p, PIDTYPE_PGID);
-		p->signal->pgrp = pgid;
-		attach_pid(p, PIDTYPE_PGID, pgid);
+		p->signal->pgrp = _pgid;
+		set_virt_pgid(p, pgid);
+		attach_pid(p, PIDTYPE_PGID, _pgid);
+		if (atomic_read(&p->signal->count) != 1) {
+			task_t *t;
+			for (t = next_thread(p); t != p; t = next_thread(t)) {
+				set_virt_pgid(t, pgid);
+			}
+		}
 	}
 
 	err = 0;
@@ -1176,19 +1334,19 @@ out:
 asmlinkage long sys_getpgid(pid_t pid)
 {
 	if (!pid) {
-		return process_group(current);
+		return virt_pgid(current);
 	} else {
 		int retval;
 		struct task_struct *p;
 
 		read_lock(&tasklist_lock);
-		p = find_task_by_pid(pid);
+		p = find_task_by_pid_ve(pid);
 
 		retval = -ESRCH;
 		if (p) {
 			retval = security_task_getpgid(p);
 			if (!retval)
-				retval = process_group(p);
+				retval = get_task_pgid(p);
 		}
 		read_unlock(&tasklist_lock);
 		return retval;
@@ -1200,7 +1358,7 @@ asmlinkage long sys_getpgid(pid_t pid)
 asmlinkage long sys_getpgrp(void)
 {
 	/* SMP - assuming writes are word atomic this is fine */
-	return process_group(current);
+	return virt_pgid(current);
 }
 
 #endif
@@ -1208,19 +1366,19 @@ asmlinkage long sys_getpgrp(void)
 asmlinkage long sys_getsid(pid_t pid)
 {
 	if (!pid) {
-		return current->signal->session;
+		return virt_sid(current);
 	} else {
 		int retval;
 		struct task_struct *p;
 
 		read_lock(&tasklist_lock);
-		p = find_task_by_pid(pid);
+		p = find_task_by_pid_ve(pid);
 
 		retval = -ESRCH;
 		if(p) {
 			retval = security_task_getsid(p);
 			if (!retval)
-				retval = p->signal->session;
+				retval = get_task_sid(p);
 		}
 		read_unlock(&tasklist_lock);
 		return retval;
@@ -1242,9 +1400,20 @@ asmlinkage long sys_setsid(void)
 
 	group_leader->signal->leader = 1;
 	__set_special_pids(group_leader->pid, group_leader->pid);
+	set_virt_pgid(group_leader, virt_pid(group_leader));
+	set_virt_sid(group_leader, virt_pid(group_leader));
 	group_leader->signal->tty = NULL;
 	group_leader->signal->tty_old_pgrp = 0;
-	err = process_group(group_leader);
+	if (atomic_read(&group_leader->signal->count) != 1) {
+		task_t *t;
+		for (t = next_thread(group_leader); t != group_leader;
+					t = next_thread(t)) {
+			set_virt_pgid(t, virt_pid(group_leader));
+			set_virt_sid(t, virt_pid(group_leader));
+		}
+	}
+
+	err = virt_pgid(group_leader);
 out:
 	write_unlock_irq(&tasklist_lock);
 	up(&tty_sem);
@@ -1524,7 +1693,7 @@ asmlinkage long sys_newuname(struct new_
 	int errno = 0;
 
 	down_read(&uts_sem);
-	if (copy_to_user(name,&system_utsname,sizeof *name))
+	if (copy_to_user(name,&ve_utsname,sizeof *name))
 		errno = -EFAULT;
 	up_read(&uts_sem);
 	return errno;
@@ -1535,15 +1704,15 @@ asmlinkage long sys_sethostname(char __u
 	int errno;
 	char tmp[__NEW_UTS_LEN];
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_VE_SYS_ADMIN))
 		return -EPERM;
 	if (len < 0 || len > __NEW_UTS_LEN)
 		return -EINVAL;
 	down_write(&uts_sem);
 	errno = -EFAULT;
 	if (!copy_from_user(tmp, name, len)) {
-		memcpy(system_utsname.nodename, tmp, len);
-		system_utsname.nodename[len] = 0;
+		memcpy(ve_utsname.nodename, tmp, len);
+		ve_utsname.nodename[len] = 0;
 		errno = 0;
 	}
 	up_write(&uts_sem);
@@ -1559,11 +1728,11 @@ asmlinkage long sys_gethostname(char __u
 	if (len < 0)
 		return -EINVAL;
 	down_read(&uts_sem);
-	i = 1 + strlen(system_utsname.nodename);
+	i = 1 + strlen(ve_utsname.nodename);
 	if (i > len)
 		i = len;
 	errno = 0;
-	if (copy_to_user(name, system_utsname.nodename, i))
+	if (copy_to_user(name, ve_utsname.nodename, i))
 		errno = -EFAULT;
 	up_read(&uts_sem);
 	return errno;
@@ -1580,7 +1749,7 @@ asmlinkage long sys_setdomainname(char _
 	int errno;
 	char tmp[__NEW_UTS_LEN];
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_VE_SYS_ADMIN))
 		return -EPERM;
 	if (len < 0 || len > __NEW_UTS_LEN)
 		return -EINVAL;
@@ -1588,8 +1757,8 @@ asmlinkage long sys_setdomainname(char _
 	down_write(&uts_sem);
 	errno = -EFAULT;
 	if (!copy_from_user(tmp, name, len)) {
-		memcpy(system_utsname.domainname, tmp, len);
-		system_utsname.domainname[len] = 0;
+		memcpy(ve_utsname.domainname, tmp, len);
+		ve_utsname.domainname[len] = 0;
 		errno = 0;
 	}
 	up_write(&uts_sem);
diff -upr linux-2.6.16.46-0.12.orig/kernel/sys_ni.c linux-2.6.16.46-0.12-027test011/kernel/sys_ni.c
--- linux-2.6.16.46-0.12.orig/kernel/sys_ni.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/kernel/sys_ni.c	2007-08-28 17:35:30.000000000 +0400
@@ -116,3 +116,8 @@ cond_syscall(sys32_sysctl);
 cond_syscall(ppc_rtas);
 cond_syscall(sys_spu_run);
 cond_syscall(sys_spu_create);
+
+cond_syscall(sys_getluid);
+cond_syscall(sys_setluid);
+cond_syscall(sys_setublimit);
+cond_syscall(sys_ubstat);
diff -upr linux-2.6.16.46-0.12.orig/kernel/sysctl.c linux-2.6.16.46-0.12-027test011/kernel/sysctl.c
--- linux-2.6.16.46-0.12.orig/kernel/sysctl.c	2007-08-24 19:28:27.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/sysctl.c	2007-08-28 17:35:36.000000000 +0400
@@ -25,6 +25,7 @@
 #include <linux/slab.h>
 #include <linux/sysctl.h>
 #include <linux/proc_fs.h>
+#include <linux/ve.h>
 #include <linux/capability.h>
 #include <linux/ctype.h>
 #include <linux/utsname.h>
@@ -46,6 +47,7 @@
 #include <linux/syscalls.h>
 #include <linux/nfs_fs.h>
 #include <linux/acpi.h>
+#include <linux/pid.h>
 
 #include <asm/uaccess.h>
 #include <asm/processor.h>
@@ -63,6 +65,7 @@ extern int max_threads;
 extern int sysrq_enabled;
 extern int core_uses_pid;
 extern int suid_dumpable;
+extern int sysctl_at_vsyscall;
 extern char core_pattern[];
 extern int cad_pid;
 extern int pid_max;
@@ -73,6 +76,8 @@ extern int pid_max_min, pid_max_max;
 extern int sysctl_drop_caches;
 extern int percpu_pagelist_fraction;
 
+extern int ve_area_access_check; /* fs/namei.c */
+
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
 int unknown_nmi_panic;
 extern int proc_unknown_nmi_panic(ctl_table *, int, struct file *,
@@ -86,6 +91,8 @@ static int min_percpu_pagelist_fract = 8
 
 static int ngroups_max = NGROUPS_MAX;
 
+int ve_allow_kthreads = 1;
+EXPORT_SYMBOL(ve_allow_kthreads);
 #ifdef CONFIG_KMOD
 extern char modprobe_path[];
 #endif
@@ -101,6 +108,11 @@ extern int msg_ctlmnb;
 extern int msg_ctlmni;
 extern int sem_ctls[];
 #endif
+#ifdef CONFIG_SCHED_VCPU
+extern u32 vcpu_sched_timeslice;
+extern int vcpu_timeslice;
+extern u32 vcpu_hot_timeslice;
+#endif
 
 #ifdef __sparc__
 extern char reboot_command [];
@@ -122,6 +134,7 @@ extern int spin_retry;
 #endif
 
 extern int sysctl_hz_timer;
+int decode_call_traces = 1;
 
 #ifdef CONFIG_BSD_PROCESS_ACCT
 extern int acct_parm[];
@@ -130,11 +143,20 @@ extern int acct_parm[];
 #ifdef CONFIG_IA64
 extern int no_unaligned_warning;
 #endif
+#ifdef CONFIG_VE
+int glob_ve_meminfo = 0;
+EXPORT_SYMBOL(glob_ve_meminfo);
+#endif
+
+#ifdef CONFIG_FAIRSCHED
+extern int fairsched_max_latency;
+extern int scale_vcpu_frequency;
+int fsch_sysctl_latency(ctl_table *ctl, int write, struct file *filp,
+		        void __user *buffer, size_t *lenp, loff_t *ppos);
+#endif
 
 static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t,
 		       ctl_table *, void **);
-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
-		  void __user *buffer, size_t *lenp, loff_t *ppos);
 static void __insert_sysctl_table(struct ctl_table_header *, int);
 
 static ctl_table root_table[];
@@ -181,6 +203,8 @@ static void register_proc_table(ctl_tabl
 static void unregister_proc_table(ctl_table *, struct proc_dir_entry *);
 #endif
 
+extern struct new_utsname virt_utsname;
+
 static unsigned int __HZ = HZ;
 /* The default sysctl tables: */
 
@@ -282,6 +306,15 @@ static ctl_table kern_table[] = {
 		.strategy	= &sysctl_string,
 	},
 	{
+		.ctl_name	= KERN_VIRT_OSRELEASE,
+		.procname	= "virt_osrelease",
+		.data		= virt_utsname.release,
+		.maxlen		= sizeof(virt_utsname.release),
+		.mode		= 0644,
+		.proc_handler	= &proc_doutsstring,
+		.strategy	= &sysctl_string,
+	},
+	{
 		.ctl_name	= KERN_PANIC,
 		.procname	= "panic",
 		.data		= &panic_timeout,
@@ -327,10 +360,11 @@ static ctl_table kern_table[] = {
 	{
 		.ctl_name	= KERN_CAP_BSET,
 		.procname	= "cap-bound",
-		.data		= &cap_bset,
+		.data		= NULL,
 		.maxlen		= sizeof(kernel_cap_t),
 		.mode		= 0600,
 		.proc_handler	= &proc_dointvec_bset,
+		.strategy	= &sysctl_strategy_bset,
 	},
 #ifdef CONFIG_BLK_DEV_INITRD
 	{
@@ -595,6 +629,32 @@ static ctl_table kern_table[] = {
 		.proc_handler	= &proc_dointvec,
 	},
 #endif
+#ifdef CONFIG_SCHED_VCPU
+	{
+		.ctl_name	= KERN_VCPU_SCHED_TIMESLICE,
+		.procname	= "vcpu_sched_timeslice",
+		.data		= &vcpu_sched_timeslice,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= KERN_VCPU_TIMESLICE,
+		.procname	= "vcpu_timeslice",
+		.data		= &vcpu_timeslice,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= KERN_VCPU_HOT_TIMESLICE,
+		.procname	= "vcpu_hot_timeslice",
+		.data		= &vcpu_hot_timeslice,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif
 	{
 		.ctl_name	= KERN_PIDMAX,
 		.procname	= "pid_max",
@@ -606,6 +666,24 @@ static ctl_table kern_table[] = {
 		.extra1		= &pid_max_min,
 		.extra2		= &pid_max_max,
 	},
+#ifdef CONFIG_VE
+	{
+		.ctl_name	= KERN_VIRT_PIDS,
+		.procname	= "virt_pids",
+		.data		= &glob_virt_pids,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= KERN_VE_MEMINFO,
+		.procname	= "ve_meminfo",
+		.data		= &glob_ve_meminfo,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif
 	{
 		.ctl_name	= KERN_PANIC_ON_OOPS,
 		.procname	= "panic_on_oops",
@@ -977,6 +1055,24 @@ static ctl_table vm_table[] = {
 		.extra2		= &one_hundred,
 	},
 #endif
+#ifdef CONFIG_FAIRSCHED
+	{
+		.ctl_name	= KERN_FAIRSCHED_MAX_LATENCY,
+		.procname	= "fairsched-max-latency",
+		.data		=  &fairsched_max_latency,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &fsch_sysctl_latency
+	},
+	{
+		.ctl_name	= KERN_SCALE_VCPU_FREQUENCY,
+		.procname	= "scale_vcpu_frequency",
+		.data		= &scale_vcpu_frequency,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif
 	{ .ctl_name = 0 }
 };
 
@@ -1103,6 +1199,22 @@ static ctl_table fs_table[] = {
 };
 
 static ctl_table debug_table[] = {
+	{
+		.ctl_name	= DBG_DECODE_CALLTRACES,
+		.procname	= "decode_call_traces",
+		.data		= &decode_call_traces,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= FS_AT_VSYSCALL,
+		.procname	= "vsyscall",
+		.data		= &sysctl_at_vsyscall,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
 	{ .ctl_name = 0 }
 };
 
@@ -1166,6 +1278,7 @@ int do_sysctl(int __user *name, int nlen
 {
 	struct list_head *tmp;
 	int error = -ENOTDIR;
+	struct ve_struct *ve;
 
 	if (nlen <= 0 || nlen >= CTL_MAXNAME)
 		return -ENOTDIR;
@@ -1174,13 +1287,24 @@ int do_sysctl(int __user *name, int nlen
 		if (!oldlenp || get_user(old_len, oldlenp))
 			return -EFAULT;
 	}
+	ve = get_exec_env();
 	spin_lock(&sysctl_lock);
+#ifdef CONFIG_VE
+	tmp = ve->sysctl_lh.next;
+#else
 	tmp = &root_table_header.ctl_entry;
+#endif
 	do {
-		struct ctl_table_header *head =
-			list_entry(tmp, struct ctl_table_header, ctl_entry);
+		struct ctl_table_header *head;
 		void *context = NULL;
 
+#ifdef CONFIG_VE
+		if (tmp == &ve->sysctl_lh)
+			/* second pass over global variables */
+			tmp = &root_table_header.ctl_entry;
+#endif
+
+		head = list_entry(tmp, struct ctl_table_header, ctl_entry);
 		if (!use_table(head))
 			continue;
 
@@ -1234,10 +1358,14 @@ static int test_perm(int mode, int op)
 static inline int ctl_perm(ctl_table *table, int op)
 {
 	int error;
+	int mode = table->mode;
+
 	error = security_sysctl(table, op);
 	if (error)
 		return error;
-	return test_perm(table->mode, op);
+	if (!ve_accessible(table->owner_env, get_exec_env()))
+		mode &= ~0222; /* disable write access */
+	return test_perm(mode, op);
 }
 
 static int parse_table(int __user *name, int nlen,
@@ -1279,6 +1407,36 @@ repeat:
 	return -ENOTDIR;
 }
 
+int __do_sysctl_strategy (void  *data, ctl_table *table,
+			int __user *name, int nlen,
+			void __user *oldval, size_t __user *oldlenp,
+			void __user *newval, size_t newlen, void **context) {
+	size_t len;
+
+	if (oldval && oldlenp) {
+		if (get_user(len, oldlenp))
+			return -EFAULT;
+		if (len) {
+			if (len > table->maxlen)
+				len = table->maxlen;
+			if (copy_to_user(oldval, data, len))
+				return -EFAULT;
+			if (put_user(len, oldlenp))
+				return -EFAULT;
+		}
+	}
+
+	if (newval && newlen) {
+		len = newlen;
+		if (len > table->maxlen)
+			len = table->maxlen;
+		if (copy_from_user(data, newval, len))
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
 /* Perform the actual read/write of a sysctl table entry. */
 int do_sysctl_strategy (ctl_table *table, 
 			int __user *name, int nlen,
@@ -1286,7 +1444,6 @@ int do_sysctl_strategy (ctl_table *table
 			void __user *newval, size_t newlen, void **context)
 {
 	int op = 0, rc;
-	size_t len;
 
 	if (oldval)
 		op |= 004;
@@ -1306,27 +1463,10 @@ int do_sysctl_strategy (ctl_table *table
 
 	/* If there is no strategy routine, or if the strategy returns
 	 * zero, proceed with automatic r/w */
-	if (table->data && table->maxlen) {
-		if (oldval && oldlenp) {
-			if (get_user(len, oldlenp))
-				return -EFAULT;
-			if (len) {
-				if (len > table->maxlen)
-					len = table->maxlen;
-				if(copy_to_user(oldval, table->data, len))
-					return -EFAULT;
-				if(put_user(len, oldlenp))
-					return -EFAULT;
-			}
-		}
-		if (newval && newlen) {
-			len = newlen;
-			if (len > table->maxlen)
-				len = table->maxlen;
-			if(copy_from_user(table->data, newval, len))
-				return -EFAULT;
-		}
-	}
+	if (table->data && table->maxlen)
+		return __do_sysctl_strategy (table->data, table, name, nlen,
+				oldval, oldlenp, newval, newlen, context);
+
 	return 0;
 }
 
@@ -1403,6 +1543,7 @@ struct ctl_table_header *register_sysctl
 					       int insert_at_head)
 {
 	struct ctl_table_header *tmp;
+
 	tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
 	if (!tmp)
 		return NULL;
@@ -1475,18 +1616,76 @@ struct ctl_table_header *register_sysctl
 void __insert_sysctl_table(struct ctl_table_header * tmp,
 			int insert_at_head)
 {
+	struct list_head *lh;
+
 	INIT_LIST_HEAD(&tmp->ctl_entry);
 	tmp->used = 0;
 	tmp->unregistering = NULL;
 	spin_lock(&sysctl_lock);
+#ifdef CONFIG_VE
+		lh = &get_exec_env()->sysctl_lh;
+#else
+		lh = &root_table_header.ctl_entry;
+#endif
 	if (insert_at_head)
-		list_add(&tmp->ctl_entry, &root_table_header.ctl_entry);
+		list_add(&tmp->ctl_entry, lh);
 	else
-		list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
+		list_add_tail(&tmp->ctl_entry, lh);
 	spin_unlock(&sysctl_lock);
 #ifdef CONFIG_PROC_FS
+#ifdef CONFIG_VE
+ 	register_proc_table(tmp->ctl_table, get_exec_env()->proc_sys_root, tmp);
+#else
 	register_proc_table(tmp->ctl_table, proc_sys_root, tmp);
 #endif
+#endif
+}
+
+void free_sysctl_clone(ctl_table *clone)
+{
+	int i;
+
+	for (i = 0; clone[i].ctl_name != 0; i++)
+		if (clone[i].child != NULL)
+			free_sysctl_clone(clone[i].child);
+
+	kfree(clone);
+}
+
+ctl_table *clone_sysctl_template(ctl_table *tmpl)
+{
+	int i, nr;
+	ctl_table *clone;
+
+	nr = 0;
+	while (tmpl[nr].ctl_name != 0)
+		nr++;
+	nr++;
+
+	clone = kmalloc(nr * sizeof(ctl_table), GFP_KERNEL);
+	if (clone == NULL)
+		return NULL;
+
+	memcpy(clone, tmpl, nr * sizeof(ctl_table));
+	for (i = 0; i < nr; i++) {
+		clone[i].owner_env = get_exec_env();
+		clone[i].de = NULL;
+		if (tmpl[i].child == NULL)
+			continue;
+
+		clone[i].child = clone_sysctl_template(tmpl[i].child);
+		if (clone[i].child == NULL)
+			goto unroll;
+	}
+	return clone;
+
+unroll:
+	for (i--; i >= 0; i--)
+		if (clone[i].child != NULL)
+			free_sysctl_clone(clone[i].child);
+
+	kfree(clone);
+	return NULL;
 }
 
 /**
@@ -1502,8 +1701,12 @@ void unregister_sysctl_table(struct ctl_
 	spin_lock(&sysctl_lock);
 	start_unregistering(header);
 #ifdef CONFIG_PROC_FS
+#ifdef CONFIG_VE
+	unregister_proc_table(header->ctl_table, get_exec_env()->proc_sys_root);
+#else
 	unregister_proc_table(header->ctl_table, proc_sys_root);
 #endif
+#endif
 	spin_unlock(&sysctl_lock);
 	kfree(header);
 }
@@ -1589,11 +1792,6 @@ static void unregister_proc_table(ctl_ta
 		 * its fields.  We are under sysctl_lock here.
 		 */
 		de->data = NULL;
-
-		/* Don't unregister proc entries that are still being used.. */
-		if (atomic_read(&de->count))
-			continue;
-
 		table->de = NULL;
 		remove_proc_entry(table->procname, root);
 	}
@@ -1735,7 +1933,7 @@ int proc_dostring(ctl_table *table, int 
  *	to observe. Should this be in kernel/sys.c ????
  */
  
-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+int proc_doutsstring(ctl_table *table, int write, struct file *filp,
 		  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int r;
@@ -1771,7 +1969,7 @@ static int do_proc_dointvec_conv(int *ne
 	return 0;
 }
 
-static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
+static int __do_proc_dointvec(void *tbl_data, ctl_table *table, int write, struct file *filp,
 		  void __user *buffer, size_t *lenp, loff_t *ppos,
 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
 			      int write, void *data),
@@ -1785,13 +1983,13 @@ static int do_proc_dointvec(ctl_table *t
 	char buf[TMPBUFLEN], *p;
 	char __user *s = buffer;
 	
-	if (!table->data || !table->maxlen || !*lenp ||
+	if (!tbl_data || !table->maxlen || !*lenp ||
 	    (*ppos && !write)) {
 		*lenp = 0;
 		return 0;
 	}
 	
-	i = (int *) table->data;
+	i = (int *) tbl_data;
 	vleft = table->maxlen / sizeof(*i);
 	left = *lenp;
 
@@ -1880,6 +2078,16 @@ static int do_proc_dointvec(ctl_table *t
 #undef TMPBUFLEN
 }
 
+static inline int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
+		  void __user *buffer, size_t *lenp, loff_t *ppos,
+		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
+			      int write, void *data),
+		  void *data)
+{
+	return __do_proc_dointvec(table->data, table, write, filp, buffer,
+			lenp, ppos, conv, data);
+}
+
 /**
  * proc_dointvec - read a vector of integers
  * @table: the sysctl table
@@ -1947,13 +2155,27 @@ int proc_dointvec_bset(ctl_table *table,
 {
 	int op;
 
-	if (!capable(CAP_SYS_MODULE)) {
+	struct ve_struct *ve;
+
+	ve = get_exec_env();
+
+	/* For VE's root writing to VE's cap-bound is prohibited */
+	if ((ve_is_super(ve) && write && !capable(CAP_SYS_MODULE)) ||
+	    (!ve_is_super(ve) && (!capable(CAP_VE_ADMIN) || write))) {
 		return -EPERM;
 	}
 
 	op = (current->pid == 1) ? OP_SET : OP_AND;
-	return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
-				do_proc_dointvec_bset_conv,&op);
+	return __do_proc_dointvec(&cap_bset, table, write, filp,
+			buffer, lenp, ppos, do_proc_dointvec_bset_conv, &op);
+}
+
+int sysctl_strategy_bset(ctl_table *table, int __user *name, int nlen,
+		void __user *oldval, size_t __user *oldlenp,
+		void __user *newval, size_t newlen, void **context) {
+
+	return __do_sysctl_strategy (&cap_bset, table, name, nlen,
+			oldval, oldlenp, newval, newlen, context);
 }
 
 struct do_proc_dointvec_minmax_conv_param {
@@ -2310,7 +2532,7 @@ int proc_dostring(ctl_table *table, int 
 	return -ENOSYS;
 }
 
-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+int proc_doutsstring(ctl_table *table, int write, struct file *filp,
 			    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
@@ -2620,6 +2842,14 @@ void unregister_sysctl_table(struct ctl_
 {
 }
 
+ctl_table * clone_sysctl_template(ctl_table *tmpl)
+{
+	return NULL;
+}
+
+void free_sysctl_clone(ctl_table *tmpl)
+{
+}
 #endif /* CONFIG_SYSCTL */
 
 /*
@@ -2632,6 +2862,7 @@ EXPORT_SYMBOL(proc_dointvec_minmax);
 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
 EXPORT_SYMBOL(proc_dostring);
+EXPORT_SYMBOL(proc_doutsstring);
 EXPORT_SYMBOL(proc_doulongvec_minmax);
 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
 EXPORT_SYMBOL(register_sysctl_table);
@@ -2641,3 +2872,5 @@ EXPORT_SYMBOL(sysctl_jiffies);
 EXPORT_SYMBOL(sysctl_ms_jiffies);
 EXPORT_SYMBOL(sysctl_string);
 EXPORT_SYMBOL(unregister_sysctl_table);
+EXPORT_SYMBOL(clone_sysctl_template);
+EXPORT_SYMBOL(free_sysctl_clone);
diff -upr linux-2.6.16.46-0.12.orig/kernel/taskstats.c linux-2.6.16.46-0.12-027test011/kernel/taskstats.c
--- linux-2.6.16.46-0.12.orig/kernel/taskstats.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/taskstats.c	2007-08-28 17:35:31.000000000 +0400
@@ -182,7 +182,7 @@ static int fill_pid(pid_t pid, struct ta
 
 	if (!pidtsk) {
 		read_lock(&tasklist_lock);
-		tsk = find_task_by_pid(pid);
+		tsk = find_task_by_pid_all(pid);
 		if (!tsk) {
 			read_unlock(&tasklist_lock);
 			return -ESRCH;
@@ -227,7 +227,7 @@ static int fill_tgid(pid_t tgid, struct 
 	first = tgidtsk;
 	if (!first) {
 		read_lock(&tasklist_lock);
-		first = find_task_by_pid(tgid);
+		first = find_task_by_pid_all(tgid);
 		if (!first) {
 			read_unlock(&tasklist_lock);
 			return -ESRCH;
@@ -256,7 +256,7 @@ static int fill_tgid(pid_t tgid, struct 
 		 */
 		delayacct_add_tsk(stats, tsk);
 
-	} while_each_thread(first, tsk);
+	} while_each_thread_all(first, tsk);
 	read_unlock(&tasklist_lock);
 	stats->version = TASKSTATS_VERSION;
 
diff -upr linux-2.6.16.46-0.12.orig/kernel/timer.c linux-2.6.16.46-0.12-027test011/kernel/timer.c
--- linux-2.6.16.46-0.12.orig/kernel/timer.c	2007-08-24 19:28:37.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/timer.c	2007-08-28 17:35:36.000000000 +0400
@@ -36,6 +36,8 @@
 #include <linux/diskdump.h>
 #include <linux/syscalls.h>
 #include <linux/delay.h>
+#include <linux/ve_proto.h>
+#include <linux/virtinfo.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -270,6 +272,8 @@ void add_timer_on(struct timer_list *tim
 	spin_unlock_irqrestore(&base->t_base.lock, flags);
 }
 
+EXPORT_SYMBOL(add_timer_on);
+
 
 /***
  * mod_timer - modify a timer's timeout
@@ -464,7 +468,11 @@ static inline void __run_timers(tvec_bas
 			spin_unlock_irqrestore(&base->t_base.lock, flags);
 			{
 				int preempt_count = preempt_count();
+				struct ve_struct *ve;
+
+				ve = set_exec_env(get_ve0());
 				fn(data);
+				(void)set_exec_env(ve);
 				if (preempt_count != preempt_count()) {
 					printk(KERN_WARNING "huh, entered %p "
 					       "with preempt_count %08x, exited"
@@ -890,6 +898,37 @@ EXPORT_SYMBOL(avenrun);
  * calc_load - given tick count, update the avenrun load estimates.
  * This is called while holding a write_lock on xtime_lock.
  */
+
+
+#ifdef CONFIG_VE
+static void calc_load_ve(void)
+{
+	unsigned long flags, nr_unint, nr_active;
+	struct ve_struct *ve;
+
+	read_lock(&ve_list_lock);
+	for_each_ve(ve) {
+		nr_active = nr_running_ve(ve) + nr_uninterruptible_ve(ve);
+		nr_active *= FIXED_1;
+
+		CALC_LOAD(ve->avenrun[0], EXP_1, nr_active);
+		CALC_LOAD(ve->avenrun[1], EXP_5, nr_active);
+		CALC_LOAD(ve->avenrun[2], EXP_15, nr_active);
+	}
+	read_unlock(&ve_list_lock);
+
+	nr_unint = nr_uninterruptible() * FIXED_1;
+	spin_lock_irqsave(&kstat_glb_lock, flags);
+	CALC_LOAD(kstat_glob.nr_unint_avg[0], EXP_1, nr_unint);
+	CALC_LOAD(kstat_glob.nr_unint_avg[1], EXP_5, nr_unint);
+	CALC_LOAD(kstat_glob.nr_unint_avg[2], EXP_15, nr_unint);
+	spin_unlock_irqrestore(&kstat_glb_lock, flags);
+
+}
+#else
+#define calc_load_ve()	do { } while (0)
+#endif
+
 static inline void calc_load(unsigned long ticks)
 {
 	unsigned long active_tasks; /* fixed-point */
@@ -902,6 +941,7 @@ static inline void calc_load(unsigned lo
 		CALC_LOAD(avenrun[0], EXP_1, active_tasks);
 		CALC_LOAD(avenrun[1], EXP_5, active_tasks);
 		CALC_LOAD(avenrun[2], EXP_15, active_tasks);
+		calc_load_ve();
 	}
 }
 
@@ -1012,7 +1052,7 @@ asmlinkage unsigned long sys_alarm(unsig
  */
 asmlinkage long sys_getpid(void)
 {
-	return current->tgid;
+	return virt_tgid(current);
 }
 
 /*
@@ -1026,7 +1066,7 @@ asmlinkage long sys_getppid(void)
 	int pid;
 
 	rcu_read_lock();
-	pid = rcu_dereference(current->real_parent)->tgid;
+	pid = virt_tgid(rcu_dereference(current->real_parent));
 	rcu_read_unlock();
 
 	return pid;
@@ -1165,7 +1205,7 @@ EXPORT_SYMBOL(schedule_timeout_uninterru
 /* Thread ID - the internal kernel "pid" */
 asmlinkage long sys_gettid(void)
 {
-	return current->pid;
+	return virt_pid(current);
 }
 
 /*
@@ -1177,11 +1217,12 @@ asmlinkage long sys_sysinfo(struct sysin
 	unsigned long mem_total, sav_total;
 	unsigned int mem_unit, bitcount;
 	unsigned long seq;
+	unsigned long *__avenrun;
+	struct timespec tp;
 
 	memset((char *)&val, 0, sizeof(struct sysinfo));
 
 	do {
-		struct timespec tp;
 		seq = read_seqbegin(&xtime_lock);
 
 		/*
@@ -1198,18 +1239,34 @@ asmlinkage long sys_sysinfo(struct sysin
 			tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
 			tp.tv_sec++;
 		}
-		val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
-
-		val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
-		val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
-		val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
+	} while (read_seqretry(&xtime_lock, seq));
 
+	if (ve_is_super(get_exec_env())) {
+		val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
+		__avenrun = &avenrun[0];
 		val.procs = nr_threads;
-	} while (read_seqretry(&xtime_lock, seq));
+	}
+#ifdef CONFIG_VE
+	else {
+		struct ve_struct *ve;
+		ve = get_exec_env();
+		__avenrun = &ve->avenrun[0];
+		val.procs = atomic_read(&ve->pcounter);
+		val.uptime = tp.tv_sec - ve->start_timespec.tv_sec;
+	}
+#endif
+	val.loads[0] = __avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
+	val.loads[1] = __avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
+	val.loads[2] = __avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
 
 	si_meminfo(&val);
 	si_swapinfo(&val);
 
+#ifdef CONFIG_USER_RESOURCE
+	if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_SYSINFO, &val)
+			& NOTIFY_FAIL)
+		return -ENOMSG;
+#endif
 	/*
 	 * If the sum of all the available memory (i.e. ram + swap)
 	 * is less than can be stored in a 32 bit unsigned long then
diff -upr linux-2.6.16.46-0.12.orig/kernel/ub/Kconfig linux-2.6.16.46-0.12-027test011/kernel/ub/Kconfig
--- linux-2.6.16.46-0.12.orig/kernel/ub/Kconfig	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/ub/Kconfig	2007-08-28 17:35:30.000000000 +0400
@@ -0,0 +1,104 @@
+#
+# User resources part (UBC)
+#
+# Copyright (C) 2005  SWsoft
+# All rights reserved.
+#
+# Licensing governed by "linux/COPYING.SWsoft" file.
+
+menu "User resources"
+
+config USER_RESOURCE
+	bool "Enable user resource accounting"
+	default y
+	help
+          This patch provides accounting and allows to configure
+          limits for user's consumption of exhaustible system resources.
+          The most important resource controlled by this patch is unswappable
+          memory (either mlock'ed or used by internal kernel structures and
+          buffers). The main goal of this patch is to protect processes
+          from running short of important resources because of an accidental
+          misbehavior of processes or malicious activity aiming to ``kill''
+          the system. It's worth to mention that resource limits configured
+          by setrlimit(2) do not give an acceptable level of protection
+          because they cover only small fraction of resources and work on a
+          per-process basis.  Per-process accounting doesn't prevent malicious
+          users from spawning a lot of resource-consuming processes.
+
+config USER_RSS_ACCOUNTING
+	bool "Account physical memory usage"
+	default y
+	depends on USER_RESOURCE
+	help
+          This allows to estimate per beancounter physical memory usage.
+          Implemented alghorithm accounts shared pages of memory as well,
+          dividing them by number of beancounter which use the page.
+
+config UBC_IO_ACCT
+	bool "Account disk IO"
+	default y
+	depends on USER_RSS_ACCOUNTING
+	help
+	  When on this option allows seeing disk IO activity caused by
+	  tasks from each UB
+
+config USER_SWAP_ACCOUNTING
+	bool "Account swap usage"
+	default y
+	depends on USER_RESOURCE
+	help
+          This allows accounting of swap usage.
+
+config USER_RESOURCE_PROC
+	bool "Report resource usage in /proc"
+	default y
+	depends on USER_RESOURCE
+	help
+          Allows a system administrator to inspect resource accounts and limits.
+
+config UBC_DEBUG
+	bool "User resources debug features"
+	default n
+	depends on USER_RESOURCE
+	help
+	  Enables to setup debug features for user resource accounting
+
+config UBC_DEBUG_IO
+	bool "Debug IO accounting"
+	default y
+	depends on UBC_DEBUG && UBC_IO_ACCT
+	help
+	  Debugging for IO accointing.
+
+config UBC_DEBUG_KMEM
+	bool "Debug kmemsize with cache counters"
+	default n
+	depends on UBC_DEBUG
+	help
+	  Adds /proc/user_beancounters_debug entry to get statistics
+	  about cache usage of each beancounter
+
+config UBC_KEEP_UNUSED
+	bool "Keep unused beancounter alive"
+	default y
+	depends on UBC_DEBUG
+	help
+	  If on, unused beancounters are kept on the hash and maxheld value
+	  can be looked through.
+
+config UBC_DEBUG_ITEMS
+	bool "Account resources in items rather than in bytes"
+	default y
+	depends on UBC_DEBUG
+	help
+	  When true some of the resources (e.g. kmemsize) are accounted
+	  in items instead of bytes.
+
+config UBC_UNLIMITED
+	bool "Use unlimited ubc settings"
+	default y
+	depends on UBC_DEBUG
+	help
+	  When ON all limits and barriers are set to max values.
+
+endmenu
diff -upr linux-2.6.16.46-0.12.orig/kernel/ub/Makefile linux-2.6.16.46-0.12-027test011/kernel/ub/Makefile
--- linux-2.6.16.46-0.12.orig/kernel/ub/Makefile	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/ub/Makefile	2007-08-28 17:35:30.000000000 +0400
@@ -0,0 +1,15 @@
+#
+# User resources part (UBC)
+#
+# Copyright (C) 2005  SWsoft
+# All rights reserved.
+#
+# Licensing governed by "linux/COPYING.SWsoft" file.
+
+obj-y := ub_sys.o beancounter.o ub_dcache.o ub_mem.o ub_misc.o \
+	 ub_pages.o ub_stat.o ub_oom.o
+
+obj-$(CONFIG_NET) += ub_net.o
+obj-$(CONFIG_USER_RSS_ACCOUNTING) += ub_page_bc.o
+obj-$(CONFIG_USER_RESOURCE_PROC)  += ub_proc.o
+obj-$(CONFIG_UBC_IO_ACCT) += io_acct.o
diff -upr linux-2.6.16.46-0.12.orig/kernel/ub/beancounter.c linux-2.6.16.46-0.12-027test011/kernel/ub/beancounter.c
--- linux-2.6.16.46-0.12.orig/kernel/ub/beancounter.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/ub/beancounter.c	2007-08-28 17:35:36.000000000 +0400
@@ -0,0 +1,668 @@
+/*
+ *  linux/kernel/ub/beancounter.c
+ *
+ *  Copyright (C) 1998  Alan Cox
+ *                1998-2000  Andrey V. Savochkin <saw@saw.sw.com.sg>
+ *  Copyright (C) 2000-2005 SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * TODO:
+ *   - more intelligent limit check in mremap(): currently the new size is
+ *     charged and _then_ old size is uncharged
+ *     (almost done: !move_vma case is completely done,
+ *      move_vma in its current implementation requires too many conditions to
+ *      do things right, because it may be not only expansion, but shrinking
+ *      also, plus do_munmap will require an additional parameter...)
+ *   - problem: bad pmd page handling
+ *   - consider /proc redesign
+ *   - TCP/UDP ports
+ *   + consider whether __charge_beancounter_locked should be inline
+ *
+ * Changes:
+ *   1999/08/17  Marcelo Tosatti <marcelo@conectiva.com.br>
+ *	- Set "barrier" and "limit" parts of limits atomically.
+ *   1999/10/06  Marcelo Tosatti <marcelo@conectiva.com.br>
+ *	- setublimit system call.
+ */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_hash.h>
+#include <ub/ub_vmpages.h>
+#include <ub/proc.h>
+
+static kmem_cache_t *ub_cachep;
+static struct user_beancounter default_beancounter;
+struct user_beancounter ub0;
+
+const char *ub_rnames[] = {
+	"kmemsize",	/* 0 */
+	"lockedpages",
+	"privvmpages",
+	"shmpages",
+	"dummy",
+	"numproc",	/* 5 */
+	"physpages",
+	"vmguarpages",
+	"oomguarpages",
+	"numtcpsock",
+	"numflock",	/* 10 */
+	"numpty",
+	"numsiginfo",
+	"tcpsndbuf",
+	"tcprcvbuf",
+	"othersockbuf",	/* 15 */
+	"dgramrcvbuf",
+	"numothersock",
+	"dcachesize",
+	"numfile",
+	"dummy",	/* 20 */
+	"dummy",
+	"dummy",
+	"numiptent",
+	"unused_privvmpages",	/* UB_RESOURCES */
+	"tmpfs_respages",
+	"swap_pages",
+	"held_pages",
+};
+
+static void init_beancounter_struct(struct user_beancounter *ub);
+static void init_beancounter_store(struct user_beancounter *ub);
+static void init_beancounter_nolimits(struct user_beancounter *ub);
+
+int print_ub_uid(struct user_beancounter *ub, char *buf, int size)
+{
+	if (ub->parent != NULL)
+		return snprintf(buf, size, "%u.%u", ub->parent->ub_uid, ub->ub_uid);
+	else
+		return snprintf(buf, size, "%u", ub->ub_uid);
+}
+EXPORT_SYMBOL(print_ub_uid);
+
+#define ub_hash_fun(x) ((((x) >> 8) ^ (x)) & (UB_HASH_SIZE - 1))
+#define ub_subhash_fun(p, id) ub_hash_fun((p)->ub_uid + (id) * 17)
+struct hlist_head ub_hash[UB_HASH_SIZE];
+DEFINE_SPINLOCK(ub_hash_lock);
+LIST_HEAD(ub_list_head); /* protected by ub_hash_lock */
+EXPORT_SYMBOL(ub_hash);
+EXPORT_SYMBOL(ub_hash_lock);
+EXPORT_SYMBOL(ub_list_head);
+
+/*
+ *	Per user resource beancounting. Resources are tied to their luid.
+ *	The resource structure itself is tagged both to the process and
+ *	the charging resources (a socket doesn't want to have to search for
+ *	things at irq time for example). Reference counters keep things in
+ *	hand.
+ *
+ *	The case where a user creates resource, kills all his processes and
+ *	then starts new ones is correctly handled this way. The refcounters
+ *	will mean the old entry is still around with resource tied to it.
+ */
+
+static inline void free_ub(struct user_beancounter *ub)
+{
+	if (ub == NULL)
+		return;
+	free_percpu(ub->ub_percpu);
+	kmem_cache_free(ub_cachep, ub);
+}
+
+static inline struct user_beancounter *bc_lookup_hash(struct hlist_head *hash,
+		uid_t uid, struct user_beancounter *parent)
+{
+	struct user_beancounter *ub;
+	struct hlist_node *ptr;
+
+	hlist_for_each_entry (ub, ptr, hash, ub_hash)
+		if (ub->ub_uid == uid && ub->parent == parent)
+			return get_beancounter(ub);
+
+	return NULL;
+}
+
+struct user_beancounter *get_beancounter_byuid(uid_t uid, int create)
+{
+	struct user_beancounter *new_ub, *ub;
+	unsigned long flags;
+	struct hlist_head *hash;
+
+	hash = &ub_hash[ub_hash_fun(uid)];
+	new_ub = NULL;
+retry:
+	spin_lock_irqsave(&ub_hash_lock, flags);
+	ub = bc_lookup_hash(hash, uid, NULL);
+	if (ub != NULL) {
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+
+		if (new_ub != NULL)
+			free_ub(new_ub);
+		return ub;
+	}
+
+	if (!create) {
+		/* no ub found */
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+		return NULL;
+	}
+
+	if (new_ub != NULL) {
+		list_add_rcu(&new_ub->ub_list, &ub_list_head);
+		hlist_add_head(&new_ub->ub_hash, hash);
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+		return new_ub;
+	}
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+
+	/* alloc new ub */
+	new_ub = (struct user_beancounter *)kmem_cache_alloc(ub_cachep,
+			GFP_KERNEL);
+	if (new_ub == NULL)
+		return NULL;
+
+	ub_debug(UBD_ALLOC, "Creating ub %p\n", new_ub);
+	memcpy(new_ub, &default_beancounter, sizeof(*new_ub));
+	init_beancounter_struct(new_ub);
+	new_ub->ub_percpu = alloc_percpu(struct ub_percpu_struct);
+	if (new_ub->ub_percpu == NULL)
+		goto fail_free;
+	new_ub->ub_uid = uid;
+	goto retry;
+
+fail_free:
+	kmem_cache_free(ub_cachep, new_ub);
+	return NULL;
+}
+EXPORT_SYMBOL(get_beancounter_byuid);
+
+struct user_beancounter *get_subbeancounter_byid(struct user_beancounter *p,
+		int id, int create)
+{
+	struct user_beancounter *new_ub, *ub;
+	unsigned long flags;
+	struct hlist_head *hash;
+
+	hash = &ub_hash[ub_subhash_fun(p, id)];
+	new_ub = NULL;
+retry:
+	spin_lock_irqsave(&ub_hash_lock, flags);
+	ub = bc_lookup_hash(hash, id, p);
+	if (ub != NULL) {
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+
+		if (new_ub != NULL) {
+			put_beancounter(new_ub->parent);
+			free_ub(new_ub);
+		}
+		return ub;
+	}
+
+	if (!create) {
+		/* no ub found */
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+		return NULL;
+	}
+
+	if (new_ub != NULL) {
+		list_add_rcu(&new_ub->ub_list, &ub_list_head);
+		hlist_add_head(&new_ub->ub_hash, hash);
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+		return new_ub;
+	}
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+
+	/* alloc new ub */
+	new_ub = (struct user_beancounter *)kmem_cache_alloc(ub_cachep,
+			GFP_KERNEL);
+	if (new_ub == NULL)
+		return NULL;
+
+	ub_debug(UBD_ALLOC, "Creating sub %p\n", new_ub);
+	memset(new_ub, 0, sizeof(*new_ub));
+	init_beancounter_nolimits(new_ub);
+	init_beancounter_store(new_ub);
+	init_beancounter_struct(new_ub);
+	new_ub->ub_percpu = alloc_percpu(struct ub_percpu_struct);
+	if (new_ub->ub_percpu == NULL)
+		goto fail_free;
+	new_ub->ub_uid = id;
+	new_ub->parent = get_beancounter(p);
+	goto retry;
+
+fail_free:
+	kmem_cache_free(ub_cachep, new_ub);
+	return NULL;
+}
+EXPORT_SYMBOL(get_subbeancounter_byid);
+
+static void put_warn(struct user_beancounter *ub)
+{
+	char id[64];
+
+	print_ub_uid(ub, id, sizeof(id));
+	printk(KERN_ERR "UB: Bad refcount (%d) on put of %s (%p)\n",
+			atomic_read(&ub->ub_refcount), id, ub);
+}
+
+#ifdef CONFIG_UBC_KEEP_UNUSED
+#define release_beancounter(ub)	do { } while (0)
+#else
+static int verify_res(struct user_beancounter *ub, int resource,
+		unsigned long held)
+{
+	char id[64];
+
+	if (likely(held == 0))
+		return 1;
+
+	print_ub_uid(ub, id, sizeof(id));
+	printk(KERN_WARNING "Ub %s helds %lu in %s on put\n",
+			id, held, ub_rnames[resource]);
+	return 0;
+}
+
+static inline void bc_verify_held(struct user_beancounter *ub)
+{
+	int i, clean;
+
+	clean = 1;
+	for (i = 0; i < UB_RESOURCES; i++)
+		clean &= verify_res(ub, i, ub->ub_parms[i].held);
+
+	clean &= verify_res(ub, UB_UNUSEDPRIVVM, ub->ub_unused_privvmpages);
+	clean &= verify_res(ub, UB_TMPFSPAGES, ub->ub_tmpfs_respages);
+	clean &= verify_res(ub, UB_SWAPPAGES, ub->ub_swap_pages);
+	clean &= verify_res(ub, UB_HELDPAGES, (unsigned long)ub->ub_held_pages);
+
+	ub_debug_trace(!clean, 5, 60*HZ);
+}
+
+static void bc_free_rcu(struct rcu_head *rcu)
+{
+	struct user_beancounter *ub;
+
+	ub = container_of(rcu, struct user_beancounter, rcu);
+	free_ub(ub);
+}
+
+static void delayed_release_beancounter(void *data)
+{
+	struct user_beancounter *ub, *parent;
+	unsigned long flags;
+
+	ub = (struct user_beancounter *)data;
+again:
+	local_irq_save(flags);
+	if (!atomic_dec_and_lock(&ub->ub_refcount, &ub_hash_lock)) {
+		/* raced with get_beancounter_byuid */
+		local_irq_restore(flags);
+		return;
+	}
+
+	hlist_del(&ub->ub_hash);
+	list_del_rcu(&ub->ub_list);
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+
+	bc_verify_held(ub);
+	ub_free_counters(ub);
+	parent = ub->parent;
+
+	call_rcu(&ub->rcu, bc_free_rcu);
+	if (parent) {
+		ub = parent;
+		goto again;
+	}
+}
+
+static inline void release_beancounter(struct user_beancounter *ub)
+{
+	execute_in_process_context(delayed_release_beancounter, ub,
+			&ub->cleanup);
+}
+#endif
+
+void __put_beancounter(struct user_beancounter *ub)
+{
+	unsigned long flags;
+
+	/* equevalent to atomic_dec_and_lock_irqsave() */
+	local_irq_save(flags);
+	if (likely(!atomic_dec_and_lock(&ub->ub_refcount, &ub_hash_lock))) {
+		if (unlikely(atomic_read(&ub->ub_refcount) < 0))
+			put_warn(ub);
+		local_irq_restore(flags);
+		return;
+	}
+
+	if (unlikely(ub == get_ub0())) {
+		printk(KERN_ERR "Trying to put ub0\n");
+		spin_unlock_irqrestore(&ub_hash_lock, flags);
+		return;
+	}
+
+	/* prevent get_beancounter_byuid + put_beancounter() reentrance */
+	atomic_inc(&ub->ub_refcount);
+	spin_unlock_irqrestore(&ub_hash_lock, flags);
+
+	release_beancounter(ub);
+}
+EXPORT_SYMBOL(__put_beancounter);
+
+void put_beancounter_safe(struct user_beancounter *ub)
+{
+	synchronize_rcu();
+	__put_beancounter(ub);
+}
+EXPORT_SYMBOL(put_beancounter_safe);
+
+/*
+ *	Generic resource charging stuff
+ */
+
+int __charge_beancounter_locked(struct user_beancounter *ub,
+		int resource, unsigned long val, enum ub_severity strict)
+{
+	ub_debug_resource(resource, "Charging %lu for %d of %p with %lu\n",
+			val, resource, ub, ub->ub_parms[resource].held);
+	/*
+	 * ub_value <= UB_MAXVALUE, value <= UB_MAXVALUE, and only one addition
+	 * at the moment is possible so an overflow is impossible.
+	 */
+	ub->ub_parms[resource].held += val;
+
+	switch (strict) {
+		case UB_HARD:
+			if (ub->ub_parms[resource].held >
+					ub->ub_parms[resource].barrier)
+				break;
+		case UB_SOFT:
+			if (ub->ub_parms[resource].held >
+					ub->ub_parms[resource].limit)
+				break;
+		case UB_FORCE:
+			ub_adjust_maxheld(ub, resource);
+			return 0;
+		default:
+			BUG();
+	}
+
+	if (strict == UB_SOFT && ub_ratelimit(&ub->ub_limit_rl))
+		printk(KERN_INFO "Fatal resource shortage: %s, UB %d.\n",
+		       ub_rnames[resource], ub->ub_uid);
+	ub->ub_parms[resource].failcnt++;
+	ub->ub_parms[resource].held -= val;
+	return -ENOMEM;
+}
+
+int charge_beancounter(struct user_beancounter *ub,
+		int resource, unsigned long val, enum ub_severity strict)
+{
+	int retval;
+	struct user_beancounter *p, *q;
+	unsigned long flags;
+
+	retval = -EINVAL;
+	if (val > UB_MAXVALUE)
+		goto out;
+
+	local_irq_save(flags);
+	for (p = ub; p != NULL; p = p->parent) {
+		spin_lock(&p->ub_lock);
+		retval = __charge_beancounter_locked(p, resource, val, strict);
+		spin_unlock(&p->ub_lock);
+		if (retval)
+			goto unroll;
+	}
+out_restore:
+	local_irq_restore(flags);
+out:
+	return retval;
+
+unroll:
+	for (q = ub; q != p; q = q->parent) {
+		spin_lock(&q->ub_lock);
+		__uncharge_beancounter_locked(q, resource, val);
+		spin_unlock(&q->ub_lock);
+	}
+	goto out_restore;
+}
+
+EXPORT_SYMBOL(charge_beancounter);
+
+void __charge_beancounter_notop(struct user_beancounter *ub,
+		int resource, unsigned long val)
+{
+	struct user_beancounter *p;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	for (p = ub; p->parent != NULL; p = p->parent) {
+		spin_lock(&p->ub_lock);
+		__charge_beancounter_locked(p, resource, val, UB_FORCE);
+		spin_unlock(&p->ub_lock);
+	}
+	local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL(__charge_beancounter_notop);
+
+void uncharge_warn(struct user_beancounter *ub, int resource,
+		unsigned long val, unsigned long held)
+{
+	char id[64];
+
+	print_ub_uid(ub, id, sizeof(id));
+	printk(KERN_ERR "Uncharging too much %lu h %lu, res %s ub %s\n",
+			val, held, ub_rnames[resource], id);
+	ub_debug_trace(1, 10, 10*HZ);
+}
+
+void __uncharge_beancounter_locked(struct user_beancounter *ub,
+		int resource, unsigned long val)
+{
+	ub_debug_resource(resource, "Uncharging %lu for %d of %p with %lu\n",
+			val, resource, ub, ub->ub_parms[resource].held);
+	if (ub->ub_parms[resource].held < val) {
+		uncharge_warn(ub, resource,
+				val, ub->ub_parms[resource].held);
+		val = ub->ub_parms[resource].held;
+	}
+	ub->ub_parms[resource].held -= val;
+}
+
+void uncharge_beancounter(struct user_beancounter *ub,
+		int resource, unsigned long val)
+{
+	unsigned long flags;
+	struct user_beancounter *p;
+
+	for (p = ub; p != NULL; p = p->parent) {
+		spin_lock_irqsave(&p->ub_lock, flags);
+		__uncharge_beancounter_locked(p, resource, val);
+		spin_unlock_irqrestore(&p->ub_lock, flags);
+	}
+}
+
+EXPORT_SYMBOL(uncharge_beancounter);
+
+void __uncharge_beancounter_notop(struct user_beancounter *ub,
+		int resource, unsigned long val)
+{
+	struct user_beancounter *p;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	for (p = ub; p->parent != NULL; p = p->parent) {
+		spin_lock(&p->ub_lock);
+		__uncharge_beancounter_locked(p, resource, val);
+		spin_unlock(&p->ub_lock);
+	}
+	local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL(__uncharge_beancounter_notop);
+
+
+/*
+ *	Rate limiting stuff.
+ */
+int ub_ratelimit(struct ub_rate_info *p)
+{
+	unsigned long cjif, djif;
+	unsigned long flags;
+	static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED;
+	long new_bucket;
+
+	spin_lock_irqsave(&ratelimit_lock, flags);
+	cjif = jiffies;
+	djif = cjif - p->last;
+	if (djif < p->interval) {
+		if (p->bucket >= p->burst) {
+			spin_unlock_irqrestore(&ratelimit_lock, flags);
+			return 0;
+		}
+		p->bucket++;
+	} else {
+		new_bucket = p->bucket - (djif / (unsigned)p->interval);
+		if (new_bucket < 0)
+			new_bucket = 0;
+		p->bucket = new_bucket + 1;
+	}
+	p->last = cjif;
+	spin_unlock_irqrestore(&ratelimit_lock, flags);
+	return 1;
+}
+EXPORT_SYMBOL(ub_ratelimit);
+
+
+/*
+ *	Initialization
+ *
+ *	struct user_beancounter contains
+ *	 - limits and other configuration settings,
+ *	   with a copy stored for accounting purposes,
+ *	 - structural fields: lists, spinlocks and so on.
+ *
+ *	Before these parts are initialized, the structure should be memset
+ *	to 0 or copied from a known clean structure.  That takes care of a lot
+ *	of fields not initialized explicitly.
+ */
+
+static void init_beancounter_struct(struct user_beancounter *ub)
+{
+	ub->ub_magic = UB_MAGIC;
+	atomic_set(&ub->ub_refcount, 1);
+	spin_lock_init(&ub->ub_lock);
+	INIT_LIST_HEAD(&ub->ub_tcp_sk_list);
+	INIT_LIST_HEAD(&ub->ub_other_sk_list);
+#ifdef CONFIG_UBC_DEBUG_KMEM
+	INIT_LIST_HEAD(&ub->ub_cclist);
+#endif
+}
+
+static void init_beancounter_store(struct user_beancounter *ub)
+{
+	int k;
+
+	for (k = 0; k < UB_RESOURCES; k++) {
+		memcpy(&ub->ub_store[k], &ub->ub_parms[k],
+				sizeof(struct ubparm));
+	}
+}
+
+static void init_beancounter_nolimits(struct user_beancounter *ub)
+{
+	int k;
+
+	for (k = 0; k < UB_RESOURCES; k++) {
+		ub->ub_parms[k].limit = UB_MAXVALUE;
+		/* FIXME: whether this is right for physpages and guarantees? */
+		ub->ub_parms[k].barrier = UB_MAXVALUE;
+	}
+
+	/* FIXME: set unlimited rate? */
+	ub->ub_limit_rl.burst = 4;
+	ub->ub_limit_rl.interval = 300*HZ;
+}
+
+static void init_beancounter_syslimits(struct user_beancounter *ub)
+{
+	extern int max_threads;
+	int k;
+	unsigned long mp = num_physpages;
+
+	ub->ub_parms[UB_KMEMSIZE].limit =
+		mp > (192*1024*1024 >> PAGE_SHIFT) ?
+				32*1024*1024 : (mp << PAGE_SHIFT) / 6;
+	ub->ub_parms[UB_LOCKEDPAGES].limit = 8;
+	ub->ub_parms[UB_PRIVVMPAGES].limit = UB_MAXVALUE;
+	ub->ub_parms[UB_SHMPAGES].limit = 64;
+	ub->ub_parms[UB_NUMPROC].limit = max_threads / 2;
+	ub->ub_parms[UB_NUMTCPSOCK].limit = 1024;
+	ub->ub_parms[UB_TCPSNDBUF].limit = 1024*4*1024; /* 4k per socket */
+	ub->ub_parms[UB_TCPRCVBUF].limit = 1024*6*1024; /* 6k per socket */
+	ub->ub_parms[UB_NUMOTHERSOCK].limit = 256;
+	ub->ub_parms[UB_DGRAMRCVBUF].limit = 256*4*1024; /* 4k per socket */
+	ub->ub_parms[UB_OTHERSOCKBUF].limit = 256*8*1024; /* 8k per socket */
+	ub->ub_parms[UB_NUMFLOCK].limit = 1024;
+	ub->ub_parms[UB_NUMPTY].limit = 16;
+	ub->ub_parms[UB_NUMSIGINFO].limit = 1024;
+	ub->ub_parms[UB_DCACHESIZE].limit = 1024*1024;
+	ub->ub_parms[UB_NUMFILE].limit = 1024;
+
+	for (k = 0; k < UB_RESOURCES; k++)
+		ub->ub_parms[k].barrier = ub->ub_parms[k].limit;
+
+	ub->ub_limit_rl.burst = 4;
+	ub->ub_limit_rl.interval = 300*HZ;
+}
+
+#ifdef CONFIG_SMP
+static struct percpu_data ub0_percpu;
+#endif
+static struct ub_percpu_struct ub0_percpu_data[NR_CPUS];
+
+void __init ub_init_ub0(void)
+{
+	struct user_beancounter *ub;
+
+	init_cache_counters();
+	ub = get_ub0();
+	memset(ub, 0, sizeof(*ub));
+	ub->ub_uid = 0;
+	init_beancounter_nolimits(ub);
+	init_beancounter_store(ub);
+	init_beancounter_struct(ub);
+	ub->ub_percpu = static_percpu_ptr(&ub0_percpu, ub0_percpu_data);
+
+	memset(&current->task_bc, 0, sizeof(struct task_beancounter));
+	(void)set_exec_ub(ub);
+	current->task_bc.task_ub = get_beancounter(ub);
+	__charge_beancounter_locked(ub, UB_NUMPROC, 1, UB_FORCE);
+	current->task_bc.fork_sub = get_beancounter(ub);
+	ub_init_task_bc(&current->task_bc);
+	init_mm.mm_ub = get_beancounter(ub);
+
+	hlist_add_head(&ub->ub_hash, &ub_hash[ub->ub_uid]);
+	list_add(&ub->ub_list, &ub_list_head);
+}
+
+void __init ub_init_late(void)
+{
+	ub_cachep = kmem_cache_create("user_beancounters",
+			sizeof(struct user_beancounter),
+			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL, NULL);
+
+	memset(&default_beancounter, 0, sizeof(default_beancounter));
+#ifdef CONFIG_UBC_UNLIMITED
+	init_beancounter_nolimits(&default_beancounter);
+#else
+	init_beancounter_syslimits(&default_beancounter);
+#endif
+	init_beancounter_store(&default_beancounter);
+	init_beancounter_struct(&default_beancounter);
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/ub/io_acct.c linux-2.6.16.46-0.12-027test011/kernel/ub/io_acct.c
--- linux-2.6.16.46-0.12.orig/kernel/ub/io_acct.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/ub/io_acct.c	2007-08-28 17:35:36.000000000 +0400
@@ -0,0 +1,522 @@
+/*
+ *  kernel/ub/io_acct.c
+ *
+ *  Copyright (C) 2006  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ *  Pavel Emelianov <xemul@openvz.org>
+ *
+ */
+
+#include <linux/mm.h>
+#include <linux/mempool.h>
+#include <linux/proc_fs.h>
+#include <linux/virtinfo.h>
+#include <linux/pagemap.h>
+
+#include <ub/beancounter.h>
+#include <ub/io_acct.h>
+#include <ub/ub_page.h>
+#include <ub/ub_vmpages.h>
+#include <ub/proc.h>
+
+static struct mempool_s *pb_pool;
+
+#define PB_MIN_IO	(1024)
+
+static inline struct page_beancounter *io_pb_alloc(void)
+{
+	return mempool_alloc(pb_pool, GFP_ATOMIC);
+}
+
+static inline void io_pb_free(struct page_beancounter *pb)
+{
+	mempool_free(pb, pb_pool);
+}
+
+#define PAGE_IO_MARK	(0x1UL)
+
+static inline struct page_beancounter *iopb_to_pb(struct page_beancounter *pb)
+{
+	if (!((unsigned long)pb & PAGE_IO_MARK))
+		return NULL;
+
+	return (struct page_beancounter *)((unsigned long)pb & ~PAGE_IO_MARK);
+}
+
+struct page_beancounter **page_pblist(struct page *page)
+{
+	struct page_beancounter **pb, *iopb;
+
+	pb = &page_pbc(page);
+	iopb = iopb_to_pb(*pb);
+
+	return iopb == NULL ? pb : &iopb->page_pb_list;
+}
+
+/*
+ * We save the context page was set dirty to use it later
+ * when the real write starts. If the page is mapped then
+ * IO pb is stores like this:
+ *
+ * Before saving:
+ *
+ *  +- page -------+
+ *  | ...          |
+ *  | page_pb      +---+
+ *  +--------------+   |   +-----+    +-----+          +-----+
+ *                     +-> | pb1 | -> | pb2 | - ... -> | pbN | -+
+ *                         +-----+    +-----+          +-----+  |
+ *                            ^                                 |
+ *                            +---------------------------------+
+ *
+ * After saving:
+ *
+ *  +- page -------+      +- io pb ------+
+ *  | ...          |      | ...          |
+ *  | page_pb      +----> | page_pb_list +-+
+ *  +--------------+      +--------------+ |
+ *                                         |
+ *                     +-------------------+
+ *                     |
+ *                     |   +-----+    +-----+          +-----+
+ *                     +-> | pb1 | -> | pb2 | - ... -> | pbN | -+
+ *                         +-----+    +-----+          +-----+  |
+ *                            ^                                 |
+ *                            +---------------------------------+
+ *
+ * And the page_pblist(...) function returns pointer to the place that
+ * points to this pbX ring.
+ */
+
+#ifdef CONFIG_UBC_DEBUG_IO
+static LIST_HEAD(pb_io_list);
+static unsigned long anon_pages, not_released;
+
+static inline void io_debug_save(struct page_beancounter *pb,
+		struct page_beancounter *mpb)
+{
+	pb->io_debug = (mpb == NULL);
+	list_add(&pb->io_list, &pb_io_list);
+}
+
+static inline void io_debug_release(struct page_beancounter *pb)
+{
+	list_del(&pb->io_list);
+}
+
+void ub_io_release_debug(struct page *page)
+{
+	struct page_beancounter *pb;
+	static int once = 0;
+
+	pb = page_pbc(page);
+	if (likely(iopb_to_pb(pb) == NULL))
+		return;
+
+	if (!once) {
+		printk("BUG: Page has an IO bc but is not expectd to\n");
+		dump_stack();
+		once = 1;
+	}
+
+	spin_lock(&pb_lock);
+	not_released++;
+	pb = iopb_to_pb(pb);
+	page_pbc(page) = NULL;
+	io_debug_release(pb);
+	pb->ub->io_pb_held--;
+	spin_unlock(&pb_lock);
+
+	put_beancounter(pb->ub);
+	io_pb_free(pb);
+}
+
+static inline int io_debug_precheck_save(struct page *page)
+{
+	if (unlikely(PageAnon(page))) {
+		anon_pages++;
+		return 1;
+	}
+
+	return 0;
+}
+
+static inline int io_debug_precheck_release(struct page *page)
+{
+	return 0;
+}
+#else
+#define io_debug_save(pb, mpb)	do { } while (0)
+#define io_debug_release(pb)	do { } while (0)
+#define io_debug_precheck_save(page)		(0)
+#define io_debug_precheck_release(p)		(0)
+#endif
+
+static inline void set_page_io(struct page *page, struct page_beancounter *pb,
+		struct page_beancounter *mapped_pb)
+{
+	unsigned long val;
+
+	val = (unsigned long)pb | PAGE_IO_MARK;
+	pb->page = page;
+
+	page_pbc(page) = (struct page_beancounter *)val;
+	io_debug_save(pb, mapped_pb);
+	pb->ub->io_pb_held++;
+}
+
+static inline void put_page_io(struct page *page, struct page_beancounter *pb)
+{
+	pb->ub->io_pb_held--;
+	io_debug_release(pb);
+	page_pbc(page) = pb->page_pb_list;
+}
+
+void ub_io_save_context(struct page *page, size_t bytes_dirtied)
+{
+	struct user_beancounter *ub;
+	struct page_beancounter *pb, *mapped_pb, *io_pb;
+
+	if (unlikely(in_interrupt())) {
+		WARN_ON(1);
+		return;
+	}
+
+	/*
+	 * FIXME - this can happen from atomic context and
+	 * it's probably not that good to loose some requests
+	 */
+
+	pb = io_pb_alloc();
+	io_pb = NULL;
+
+	spin_lock(&pb_lock);
+	if (io_debug_precheck_save(page))
+		goto out_unlock;
+
+	mapped_pb = page_pbc(page);
+	io_pb = iopb_to_pb(mapped_pb);
+	if (io_pb != NULL) {
+		/*
+		 * this page has an IO - release it and force a new one
+		 * We could also race with page cleaning - see below
+		 */
+		mapped_pb = io_pb->page_pb_list;
+		put_page_io(page, io_pb);
+	}
+
+	/*
+	 * If the page is mapped we must save the context
+	 * it maps to. If the page isn't mapped we use current
+	 * context as this is a regular write.
+	 */
+
+	if (mapped_pb != NULL)
+		ub = top_beancounter(mapped_pb->ub);
+	else
+		ub = get_io_ub();
+
+	if (!PageDirty(page)) {
+		/*
+		 * race with clear_page_dirty(_for_io) - account
+		 * writes for ub_io_release_context()
+		 */
+		if (io_pb != NULL)
+			io_pb->ub->bytes_wrote += PAGE_CACHE_SIZE;
+		if (pb != NULL)
+			io_pb_free(pb);
+		goto out_unlock;
+	}
+
+	if (pb == NULL) {
+		ub->bytes_dirty_missed += bytes_dirtied;
+		goto out_unlock;
+	}
+
+	/*
+	 * the page may become clean here, but the context will be seen
+	 * in ub_io_release_context()
+	 */
+
+	pb->ub = get_beancounter(ub);
+	pb->page_pb_list = mapped_pb;
+	ub->bytes_dirtied += bytes_dirtied;
+
+	set_page_io(page, pb, mapped_pb);
+
+out_unlock:
+	spin_unlock(&pb_lock);
+
+	if (io_pb != NULL) {
+		put_beancounter(io_pb->ub);
+		io_pb_free(io_pb);
+	}
+}
+
+void ub_io_release_context(struct page *page, size_t wrote)
+{
+	struct page_beancounter *pb;
+
+	if (io_debug_precheck_release(page))
+		return;
+
+	if (unlikely(in_interrupt())) {
+		WARN_ON(1);
+		return;
+	}
+
+	spin_lock(&pb_lock);
+	pb = iopb_to_pb(page_pbc(page));
+	if (unlikely(pb == NULL))
+		/*
+		 * this may happen if we failed to allocate
+		 * context in ub_io_save_context or raced with it
+		 */
+		goto out_unlock;
+
+	if (wrote)
+		pb->ub->bytes_wrote += wrote;
+
+	put_page_io(page, pb);
+out_unlock:
+	spin_unlock(&pb_lock);
+
+	if (pb != NULL) {
+		put_beancounter(pb->ub);
+		io_pb_free(pb);
+	}
+}
+
+void __init ub_init_io(struct kmem_cache *pb_cachep)
+{
+	pb_pool = mempool_create_slab_pool(PB_MIN_IO, pb_cachep);
+	if (pb_pool == NULL)
+		panic("Can't create pb_pool");
+}
+
+#ifdef CONFIG_PROC_FS
+#define in_flight(var)	(var > var##_done ? var - var##_done : 0)
+
+static int bc_ioacct_show(struct seq_file *f, void *v)
+{
+	int i;
+	unsigned long long read, write, cancel;
+	unsigned long sync, sync_done;
+	unsigned long fsync, fsync_done;
+	unsigned long fdsync, fdsync_done;
+	unsigned long frsync, frsync_done;
+	unsigned long reads, writes;
+	unsigned long long rchar, wchar;
+	struct user_beancounter *ub;
+
+	ub = seq_beancounter(f);
+
+	read = write = cancel = 0;
+	sync = sync_done = fsync = fsync_done =
+		fdsync = fdsync_done = frsync = frsync_done = 0;
+	reads = writes = 0;
+	rchar = wchar = 0;
+	for_each_online_cpu(i) {
+		struct ub_percpu_struct *ub_percpu;
+		ub_percpu = per_cpu_ptr(ub->ub_percpu, i);
+
+		read += ub_percpu->bytes_read;
+		write += ub_percpu->bytes_wrote;
+		cancel += ub_percpu->bytes_cancelled;
+
+		sync += ub_percpu->sync;
+		fsync += ub_percpu->fsync;
+		fdsync += ub_percpu->fdsync;
+		frsync += ub_percpu->frsync;
+		sync_done += ub_percpu->sync_done;
+		fsync_done += ub_percpu->fsync_done;
+		fdsync_done += ub_percpu->fdsync_done;
+		frsync_done += ub_percpu->frsync_done;
+
+		reads += ub_percpu->read;
+		writes += ub_percpu->write;
+		rchar += ub_percpu->rchar;
+		wchar += ub_percpu->wchar;
+	}
+
+	seq_printf(f, bc_proc_llu_fmt, "read", read);
+	seq_printf(f, bc_proc_llu_fmt, "write", ub->bytes_wrote + write);
+	seq_printf(f, bc_proc_llu_fmt, "dirty", ub->bytes_dirtied);
+	seq_printf(f, bc_proc_llu_fmt, "cancel", cancel);
+	seq_printf(f, bc_proc_llu_fmt, "missed", ub->bytes_dirty_missed);
+
+	seq_printf(f, bc_proc_lu_lfmt, "syncs_total", sync);
+	seq_printf(f, bc_proc_lu_lfmt, "fsyncs_total", fsync);
+	seq_printf(f, bc_proc_lu_lfmt, "fdatasyncs_total", fdsync);
+	seq_printf(f, bc_proc_lu_lfmt, "range_syncs_total", frsync);
+
+	seq_printf(f, bc_proc_lu_lfmt, "syncs_active", in_flight(sync));
+	seq_printf(f, bc_proc_lu_lfmt, "fsyncs_active", in_flight(fsync));
+	seq_printf(f, bc_proc_lu_lfmt, "fdatasyncs_active", in_flight(fsync));
+	seq_printf(f, bc_proc_lu_lfmt, "range_syncs_active", in_flight(frsync));
+
+	seq_printf(f, bc_proc_lu_lfmt, "vfs_reads", reads);
+	seq_printf(f, bc_proc_llu_fmt, "vfs_read_chars", rchar);
+	seq_printf(f, bc_proc_lu_lfmt, "vfs_writes", writes);
+	seq_printf(f, bc_proc_llu_fmt, "vfs_write_chars", wchar);
+
+	seq_printf(f, bc_proc_lu_lfmt, "io_pbs", ub->io_pb_held);
+	return 0;
+}
+
+static struct bc_proc_entry bc_ioacct_entry = {
+	.name = "ioacct",
+	.u.show = bc_ioacct_show,
+};
+
+#ifdef CONFIG_UBC_DEBUG_IO
+#define PTR_SIZE (int)(sizeof(void *) * 2)
+#define INT_SIZE (int)(sizeof(int) * 2)
+
+static int bc_io_show(struct seq_file *f, void *v)
+{
+	struct list_head *lh;
+	struct page_beancounter *pb;
+	struct page *pg;
+
+	lh = (struct list_head *)v;
+	if (lh == &pb_io_list) {
+		seq_printf(f, "Races: anon %lu missed %lu\n",
+				anon_pages, not_released);
+
+		seq_printf(f, "%-*s %-1s %-*s %-4s %*s %*s "
+				"%-*s %-*s %-1s %-*s %-*s\n",
+				PTR_SIZE, "pb", "",
+				PTR_SIZE, "page", "flg",
+				INT_SIZE, "cnt", INT_SIZE, "mcnt",
+				PTR_SIZE, "pb_list",
+				PTR_SIZE, "page_pb", "",
+				PTR_SIZE, "mapping",
+				INT_SIZE, "ub");
+		return 0;
+	}
+
+	pb = list_entry(lh, struct page_beancounter, io_list);
+	pg = pb->page;
+	seq_printf(f, "%p %c %p %c%c%c%c %*d %*d %p %p %c %p %d\n",
+			pb, pb->io_debug ? 'e' : 'm', pg,
+			PageDirty(pg) ? 'D' : 'd',
+			PageAnon(pg) ? 'A' : 'a',
+			PageWriteback(pg) ? 'W' : 'w',
+			PageLocked(pg) ? 'L' : 'l',
+			INT_SIZE, page_count(pg),
+			INT_SIZE, page_mapcount(pg),
+			pb->page_pb_list, page_pbc(pg),
+			iopb_to_pb(page_pbc(pg)) == pb ? ' ' : '!',
+			pg->mapping, pb->ub->ub_uid);
+	return 0;
+}
+
+static void *bc_io_start(struct seq_file *f, loff_t *ppos)
+{
+	loff_t pos;
+	struct list_head *lh;
+
+	pos = *ppos;
+	spin_lock(&pb_lock);
+	if (pos == 0)
+		return &pb_io_list;
+
+	list_for_each (lh, &pb_io_list)
+		if (pos-- == 1)
+			return lh;
+	return NULL;
+}
+
+static void *bc_io_next(struct seq_file *f, void *v, loff_t *ppos)
+{
+	struct list_head *lh;
+
+	(*ppos)++;
+	lh = (struct list_head *)v;
+	return lh->next == &pb_io_list ? NULL : lh->next;
+}
+
+static void bc_io_stop(struct seq_file *f, void *v)
+{
+	spin_unlock(&pb_lock);
+}
+
+static struct seq_operations bc_io_seq_ops = {
+	.start = bc_io_start,
+	.next  = bc_io_next,
+	.stop  = bc_io_stop,
+	.show  = bc_io_show,
+};
+
+static int bc_io_open(struct inode *inode, struct file *filp)
+{
+	if (!(capable(CAP_DAC_OVERRIDE) && capable(CAP_DAC_READ_SEARCH)))
+		return -EACCES;
+
+	return seq_open(filp, &bc_io_seq_ops);
+}
+static struct file_operations bc_io_debug_ops = {
+	.open		= bc_io_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static struct bc_proc_entry bc_ioacct_debug_entry = {
+	.name = "ioacct_debug",
+	.u.fops = &bc_io_debug_ops,
+};
+#endif
+
+static int bc_ioacct_notify(struct vnotifier_block *self,
+		unsigned long event, void *arg, int old_ret)
+{
+	struct user_beancounter *ub;
+	struct page_state *ps;
+	unsigned long long bin, bout;
+	int i;
+
+	if (event != VIRTINFO_VMSTAT)
+		return old_ret;
+
+	ub = top_beancounter(get_exec_ub());
+
+	/* Think over: do we need to account here bytes_dirty_missed? */
+	bout = ub->bytes_wrote;
+	bin = 0;
+	for_each_online_cpu(i) {
+		bout += per_cpu_ptr(ub->ub_percpu, i)->bytes_wrote;
+		bin += per_cpu_ptr(ub->ub_percpu, i)->bytes_read;
+	}
+
+	/* convert to Kbytes */
+	bout >>= 10;
+	bin >>= 10;
+
+	ps = (struct page_state *)arg;
+	ps->pgpgin = bin;
+	ps->pgpgout = bout;
+	return NOTIFY_OK;
+}
+
+static struct vnotifier_block bc_ioacct_nb = {
+	.notifier_call = bc_ioacct_notify,
+};
+
+static int __init bc_ioacct_init(void)
+{
+#ifdef CONFIG_UBC_DEBUG_IO
+	bc_register_proc_root_entry(&bc_ioacct_debug_entry);
+#endif
+	bc_register_proc_entry(&bc_ioacct_entry);
+
+	virtinfo_notifier_register(VITYPE_GENERAL, &bc_ioacct_nb);
+	return 0;
+}
+
+late_initcall(bc_ioacct_init);
+#endif
diff -upr linux-2.6.16.46-0.12.orig/kernel/ub/ub_dcache.c linux-2.6.16.46-0.12-027test011/kernel/ub/ub_dcache.c
--- linux-2.6.16.46-0.12.orig/kernel/ub/ub_dcache.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/ub/ub_dcache.c	2007-08-28 17:35:36.000000000 +0400
@@ -0,0 +1,674 @@
+/*
+ *  kernel/ub/ub_dcache.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/dcache.h>
+#include <linux/slab.h>
+#include <linux/kmem_cache.h>
+#include <linux/fs.h>
+#include <linux/kmem_slab.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/sysctl.h>
+#include <linux/swap.h>
+#include <linux/stop_machine.h>
+#include <linux/cpumask.h>
+#include <linux/nmi.h>
+#include <linux/rwsem.h>
+#include <linux/rcupdate.h>
+#include <asm/bitops.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_mem.h>
+#include <ub/ub_dcache.h>
+#include <ub/ub_dcache_op.h>
+
+/*
+ * Locking
+ *                          traverse  dcache_lock  d_lock
+ *        ub_dentry_charge   +         -            +
+ *      ub_dentry_uncharge   +         +            -
+ * ub_dentry_charge_nofail   +         +            -
+ *
+ * d_inuse changes are atomic, with special handling of "not in use" <->
+ * "in use" (-1 <-> 0) transitions.  We have two sources of non-atomicity
+ * here: (1) in many operations we need to change d_inuse of both dentry and
+ * its parent, and (2) on state transitions we need to adjust the account.
+ *
+ * Regarding (1): we do not have (and do not want) a single lock covering all
+ * operations, so in general it's impossible to get a consistent view of
+ * a tree with respect to d_inuse counters (except by swsuspend).  It also
+ * means if a dentry with d_inuse of 0 gets one new in-use child and loses
+ * one, it's d_inuse counter will go either 0 -> 1 -> 0 path or 0 -> -1 -> 0,
+ * and we can't say which way.
+ * Note that path -1 -> 0 -> -1 can't turn into -1 -> -2 -> -1, since
+ * uncharge can be done only after return from charge (with d_genocide being
+ * the only apparent exception).
+ * Regarding (2): there is a similar uncertainty with the dcache account.
+ * If the account is equal to the limit, one more dentry is started to be
+ * used and one is put, the account will either hit the limit (and an error
+ * will be returned), or decrement will happen before increment.
+ *
+ * These races do not really matter.
+ * The only things we want are:
+ *  - if a system is suspenede with no in-use dentries, all d_inuse counters
+ *    should be correct (-1);
+ *  - d_inuse counters should always be >= -1.
+ * This holds if ->parent references are accessed and maintained properly.
+ * In subtle moments (like d_move) dentries exchanging their parents should
+ * both be in-use.  At d_genocide time, lookups and charges are assumed to be
+ * impossible.
+ */
+
+/*
+ * Hierarchical accounting
+ * UB argument must NOT be NULL
+ */
+
+static int do_charge_dcache(struct user_beancounter *ub, unsigned long size,
+		enum ub_severity sv)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	if (__charge_beancounter_locked(ub, UB_KMEMSIZE, CHARGE_SIZE(size), sv))
+		goto out_mem;
+	if (__charge_beancounter_locked(ub, UB_DCACHESIZE, size, sv))
+		goto out_dcache;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return 0;
+
+out_dcache:
+	__uncharge_beancounter_locked(ub, UB_KMEMSIZE, CHARGE_SIZE(size));
+out_mem:
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return -ENOMEM;
+}
+
+static void do_uncharge_dcache(struct user_beancounter *ub,
+		unsigned long size)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	__uncharge_beancounter_locked(ub, UB_KMEMSIZE, CHARGE_SIZE(size));
+	__uncharge_beancounter_locked(ub, UB_DCACHESIZE, size);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+static int charge_dcache(struct user_beancounter *ub, unsigned long size,
+		enum ub_severity sv)
+{
+	struct user_beancounter *p, *q;
+
+	for (p = ub; p != NULL; p = p->parent) {
+		if (do_charge_dcache(p, size, sv))
+			goto unroll;
+	}
+	return 0;
+
+unroll:
+	for (q = ub; q != p; q = q->parent)
+		do_uncharge_dcache(q, size);
+	return -ENOMEM;
+}
+
+void uncharge_dcache(struct user_beancounter *ub, unsigned long size)
+{
+	for (; ub != NULL; ub = ub->parent)
+		do_uncharge_dcache(ub, size);
+}
+
+/*
+ * Simple helpers to do maintain account and d_ub field.
+ */
+
+static inline int d_charge(struct dentry_beancounter *d_bc)
+{
+	struct user_beancounter *ub;
+
+	ub = get_beancounter(get_exec_ub());
+	if (charge_dcache(ub, d_bc->d_ubsize, UB_SOFT)) {
+		put_beancounter(ub);
+		return -1;
+	}
+	d_bc->d_ub = ub;
+	return 0;
+}
+
+static inline void d_forced_charge(struct dentry_beancounter *d_bc)
+{
+	struct user_beancounter *ub;
+
+	ub = get_beancounter(get_exec_ub());
+	charge_dcache(ub, d_bc->d_ubsize, UB_FORCE);
+	d_bc->d_ub = ub;
+}
+
+/*
+ * Minor helpers
+ */
+
+extern kmem_cache_t *dentry_cache; 
+extern kmem_cache_t *inode_cachep;
+static struct rw_semaphore ub_dentry_alloc_sem;
+
+static inline unsigned int dentry_memusage(void)
+{
+	return dentry_cache->objuse;
+}
+
+static inline unsigned int inode_memusage(void)
+{
+	return inode_cachep->objuse;
+}
+
+static inline unsigned long d_charge_size(struct dentry *dentry)
+{
+	/* dentry's d_name is already set to appropriate value (see d_alloc) */
+	return inode_cachep->objuse + dentry_cache->objuse +
+		(dname_external(dentry) ?
+		 kmem_obj_memusage((void *)dentry->d_name.name) : 0);
+}
+
+/*
+ * Entry points from dcache.c
+ */
+
+/*
+ * Set initial d_inuse on d_alloc.
+ * Called with no locks, preemption disabled.
+ */
+int __ub_dentry_alloc(struct dentry *dentry)
+{
+	struct dentry_beancounter *d_bc;
+
+	d_bc = &dentry->dentry_bc;
+	d_bc->d_ub = get_beancounter(get_exec_ub());
+	atomic_set(&d_bc->d_inuse, INUSE_INIT); /* see comment in ub_dcache.h */
+	d_bc->d_ubsize = d_charge_size(dentry);
+
+	if (charge_dcache(d_bc->d_ub, d_bc->d_ubsize, UB_HARD))
+		goto failure;
+	return 0;
+
+failure:
+	put_beancounter(d_bc->d_ub);
+	d_bc->d_ub = NULL;
+	return -ENOMEM;
+}
+void __ub_dentry_alloc_start(void)
+{
+	down_read(&ub_dentry_alloc_sem);
+	current->task_bc.dentry_alloc = 1;
+}
+
+void __ub_dentry_alloc_end(void)
+{
+	current->task_bc.dentry_alloc = 0;
+	up_read(&ub_dentry_alloc_sem);
+}
+
+/*
+ * It is assumed that parent is already in use, so traverse upwards is
+ * limited to one ancestor only.
+ * Called under d_lock and rcu_read_lock.
+ */
+int __ub_dentry_charge(struct dentry *dentry)
+{
+	struct dentry_beancounter *d_bc;
+	struct dentry *parent;
+	int ret;
+
+	if (ub_dget_testone(dentry)) {
+		d_bc = &dentry->dentry_bc;
+		/* state transition -1 => 0 */
+		if (d_charge(d_bc))
+			goto failure;
+
+		if (dentry != dentry->d_parent) {
+			parent = dentry->d_parent;
+			if (ub_dget_testone(parent))
+				BUG();
+		}
+	}
+	return 0;
+
+failure:
+	/*
+	 * Here we would like to fail the lookup.
+	 * It is not easy: if d_lookup fails, callers expect that a dentry
+	 * with the given name doesn't exist, and create a new one.
+	 * So, first we forcedly charge for this dentry.
+	 * Then try to remove it from cache safely.  If it turns out to be
+	 * possible, we can return error.
+	 */
+	d_forced_charge(d_bc);
+
+	if (dentry != dentry->d_parent) {
+		parent = dentry->d_parent;
+		if (ub_dget_testone(parent))
+			BUG();
+	}
+
+	ret = 0;
+	if (spin_trylock(&dcache_lock)) {
+		if (!list_empty(&dentry->d_subdirs)) {
+			spin_unlock(&dentry->d_lock);
+			spin_unlock(&dcache_lock);
+			rcu_read_unlock();
+			shrink_dcache_parent(dentry);
+			rcu_read_lock();
+			spin_lock(&dcache_lock);
+			spin_lock(&dentry->d_lock);
+		}
+		if (atomic_read(&dentry->d_count) == 1) {
+			__d_drop(dentry);
+			ret = -1;
+		}
+		spin_unlock(&dcache_lock);
+	}
+
+	return ret;
+}
+
+/*
+ * Go up in the tree decreasing d_inuse.
+ * Called under dcache_lock.
+ */
+void __ub_dentry_uncharge(struct dentry *dentry)
+{
+	struct dentry *parent;
+	struct user_beancounter *ub;
+	unsigned long size;
+
+	/* go up until state doesn't change or and root is reached */
+	size = dentry->dentry_bc.d_ubsize;
+	ub = dentry->dentry_bc.d_ub;
+	while (ub_dput_testzero(dentry)) {
+		/* state transition 0 => -1 */
+		uncharge_dcache(ub, size);
+		put_beancounter(ub);
+
+		parent = dentry->d_parent;
+		if (dentry == parent)
+			break;
+
+		dentry = parent;
+		size = dentry->dentry_bc.d_ubsize;
+		ub = dentry->dentry_bc.d_ub;
+	}
+}
+
+/*
+ * Forced charge for __dget_locked, where API doesn't allow to return error.
+ * Called under dcache_lock.
+ */
+void __ub_dentry_charge_nofail(struct dentry *dentry)
+{
+	struct dentry *parent;
+
+	while (ub_dget_testone(dentry)) {
+		/* state transition -1 => 0 */
+		d_forced_charge(&dentry->dentry_bc);
+
+		parent = dentry->d_parent;
+		if (dentry == parent)
+			break;
+		dentry = parent;
+	}
+}
+
+/*
+ * Adaptive accounting
+ */
+
+int ub_dentry_on;
+int ub_dentry_alloc_barrier;
+EXPORT_SYMBOL(ub_dentry_on);
+
+static DEFINE_PER_CPU(int, checkcnt);
+static unsigned long checklowat = 0;
+static unsigned long checkhiwat = ULONG_MAX;
+
+static int sysctl_ub_dentry_chk = 10;
+#define sysctl_ub_lowat	sysctl_ub_watermark[0]
+#define sysctl_ub_hiwat sysctl_ub_watermark[1]
+static DECLARE_RWSEM(ub_dentry_alloc_sem);
+/* 1024th of lowmem size */
+static unsigned int sysctl_ub_watermark[2] = {0, 100};
+
+
+static int ub_dentry_acctinit(struct dentry *dentry)
+{
+	struct dentry_beancounter *d_bc;
+
+	d_bc = &dentry->dentry_bc;
+	d_bc->d_ub = NULL;
+	atomic_set(&d_bc->d_inuse, -1);
+	if (dname_external(dentry)) {
+		struct page *page;
+		page = virt_to_page(dentry->d_name.name);
+		if (!PageSlab(page) || page_get_cache(page) == NULL) {
+			printk("Problem with name, dentry %p, parent %p, "
+					"name %p len %d\n",
+					dentry, dentry->d_parent,
+					dentry->d_name.name,
+					dentry->d_name.len);
+			printk("   de %p name %.10s\n",
+					dentry, dentry->d_name.name);
+			d_bc->d_ubsize = 0;
+			return 0;
+		}
+	}
+	d_bc->d_ubsize = d_charge_size(dentry);
+	return 0;
+}
+
+static int ub_dentry_acctcount(struct dentry *dentry)
+{
+	struct dentry_beancounter *d_bc;
+	struct dentry *child;
+	int count;
+
+	count = 0;
+	list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child)
+		count++;
+
+	d_bc = &dentry->dentry_bc;
+	count = atomic_read(&dentry->d_count) - count;
+	if (count) {
+		__ub_dentry_charge_nofail(dentry);
+		if (count > 1)
+			atomic_add(count - 1, &d_bc->d_inuse);
+	}
+
+	return 0;
+}
+
+static int ub_dentry_acctdrop(struct dentry *dentry)
+{
+	struct dentry_beancounter *d_bc;
+
+	d_bc = &dentry->dentry_bc;
+	if (atomic_read(&d_bc->d_inuse) < 0)
+		return 0;
+	atomic_set(&d_bc->d_inuse, -1);
+	uncharge_dcache(d_bc->d_ub, d_bc->d_ubsize);
+	put_beancounter(d_bc->d_ub);
+	return 0;
+}
+
+extern void kmem_cache_free_block(kmem_cache_t *cachep, void **objpp,
+		int nr_objects, int node);
+
+static int ub_dentry_walk_node(int (*fun)(struct dentry *), int node)
+{
+	kmem_cache_t *cachep;
+	struct array_cache *ac;
+	struct slab *slabp;
+	char *objp;
+	int cpu, i, sz, r, n;
+	struct kmem_list3 *l3;
+	unsigned long map[PAGE_SIZE / sizeof(struct dentry)
+					/ BITS_PER_LONG + 1];
+
+	cachep = dentry_cache;
+	if (cachep->num >= sizeof(map) * 8)
+		return -E2BIG;
+
+	l3 = cachep->nodelists[node];
+	/* drain all CPU caches to have up-to-date free map */
+
+#ifdef CONFIG_NUMA
+	/* walk through all nodes and drain alien caches */
+	for_each_online_node (n) {
+		if (!cachep->nodelists[n]->alien)
+			continue;
+		ac = cachep->nodelists[n]->alien[node];
+		if (!ac)
+			continue;
+		kmem_cache_free_block(cachep, ac->entry, ac->avail, node);
+		ac->avail = 0;
+	}
+#endif
+
+	ac = l3->shared;
+	kmem_cache_free_block(cachep, ac->entry, ac->avail, node);
+	ac->avail = 0;
+	for_each_online_cpu(cpu) {
+		ac = cachep->array[cpu];
+		n = cpu_to_node(cpu);
+		kmem_cache_free_block(cachep,
+				ac->entry, ac->avail, n);
+		ac->avail = 0;
+	}
+
+	list_for_each_entry(slabp, &l3->slabs_full, list) {
+		touch_nmi_watchdog();
+		for (i = 0, objp = slabp->s_mem;
+		     i < cachep->num;
+		     i++, objp += cachep->buffer_size) {
+#if SLAB_DEBUG
+			r = (*fun)((struct dentry *)
+					(objp + cachep->obj_offset));
+#else
+			r = (*fun)((struct dentry *)objp);
+#endif
+			if (r)
+				return r;
+		}
+	}
+
+	list_for_each_entry(slabp, &l3->slabs_partial, list) {
+		touch_nmi_watchdog();
+		memset(map, 0xff, sizeof(map));
+		for (i = slabp->free, r = 0;
+		     i != BUFCTL_END;
+		     i = slab_bufctl(slabp)[i], r++) {
+			if (r > cachep->num)
+				return -1;
+			__clear_bit(i, map);
+		}
+		sz = sizeof(map) * BITS_PER_LONG;
+		for (i = find_first_bit(map, sz);
+		     i < cachep->num;
+		     i = find_next_bit(map, sz, i + 1)) {
+			objp = slabp->s_mem + i * cachep->buffer_size;
+#if SLAB_DEBUG
+			r = (*fun)((struct dentry *)
+					(objp + cachep->obj_offset));
+#else
+			r = (*fun)((struct dentry *)objp);
+#endif
+			if (r)
+				return r;
+		}
+	}
+
+	return 0;
+}
+
+static int ub_dentry_walk(int (*fun)(struct dentry *))
+{
+	int node;
+	int err;
+
+	for_each_online_node (node) {
+		if ((err = ub_dentry_walk_node(fun, node)) != 0)
+			return err;
+	}
+	return 0;
+}
+
+static int ub_dentry_accton(void *data)
+{
+	struct user_beancounter *ub;
+	int err;
+
+	ub = get_exec_ub();
+	set_exec_ub(get_ub0());
+	err = ub_dentry_walk(&ub_dentry_acctinit);
+	if (!err)
+		err = ub_dentry_walk(&ub_dentry_acctcount);
+	set_exec_ub(ub);
+	if (err == 0)
+		ub_dentry_on = 1;
+	return err;
+}
+
+static int ub_dentry_acctoff(void *data)
+{
+	int ret;
+	ret = ub_dentry_walk(&ub_dentry_acctdrop);
+	if (ret == 0)
+		ub_dentry_on = 0;
+	return ret;
+}
+
+/*
+ * Main function turning dcache accounting on and off.
+ * Called with preemption disabled (for caller's convenience).
+ */
+static void ub_dentry_switch(int onoff, unsigned long pages, int (*fun)(void *))
+{
+	static char *s[] = { "off", "on" };
+	unsigned long start_jiffies;
+	int err, tm;
+
+	start_jiffies = jiffies;
+	preempt_enable();
+	ub_dentry_alloc_barrier = 1;
+	/* ensure ub_dentry_alloc_barrier is visible on all CPUs */
+	mb();
+	synchronize_rcu();
+	down_write(&ub_dentry_alloc_sem);
+	if (ub_dentry_on == onoff)
+		goto done;
+
+	printk("UBC: preparing to turn dcache accounting %s, "
+			"size %lu pages, watermarks %lu %lu\n",
+			s[onoff], pages, checklowat, checkhiwat);
+	err = stop_machine_run(fun, NULL, NR_CPUS);
+	if (err) {
+		printk(KERN_ERR "UBC: ERROR: dcache accounting switch %d\n",
+				err);
+		preempt_disable();
+		checklowat = 0;
+		checkhiwat = ULONG_MAX;
+		sysctl_ub_dentry_chk = INT_MAX;
+		preempt_enable();
+	} else {
+		tm = jiffies_to_msecs(jiffies - start_jiffies);
+		printk("UBC: turning dcache accounting %s succeeded, "
+				"usage %lu, time %u.%03u\n",
+				s[onoff],
+				get_ub0()->ub_parms[UB_DCACHESIZE].held,
+				tm / 1000, tm % 1000);
+	}
+
+done:
+	ub_dentry_alloc_barrier = 0;
+	up_write(&ub_dentry_alloc_sem);
+	preempt_disable();
+}
+
+void ub_dentry_checkup(void)
+{
+	int *p;
+	unsigned long pages;
+
+	preempt_disable();
+	p = &__get_cpu_var(checkcnt);
+	if (++*p > sysctl_ub_dentry_chk) {
+		*p = 0;
+		pages = dentry_cache->grown
+			- dentry_cache->reaped
+			- dentry_cache->shrunk;
+		pages <<= dentry_cache->gfporder;
+		if (ub_dentry_on) {
+			if (pages < checklowat)
+				ub_dentry_switch(0, pages, &ub_dentry_acctoff);
+		} else {
+			if (pages >= checkhiwat)
+				ub_dentry_switch(1, pages, &ub_dentry_accton);
+		}
+	}
+	preempt_enable();
+}
+
+static void ub_dentry_set_limits(unsigned long pages, unsigned long cap)
+{
+	down_write(&ub_dentry_alloc_sem);
+	preempt_disable();
+	checklowat = (pages >> 10) * sysctl_ub_lowat;
+	checkhiwat = (pages >> 10) * sysctl_ub_hiwat;
+	if (checkhiwat > cap) {
+		checkhiwat = cap;
+		checklowat = cap / sysctl_ub_hiwat * sysctl_ub_lowat;
+	}
+	preempt_enable();
+	up_write(&ub_dentry_alloc_sem);
+}
+
+static int ub_dentry_proc_handler(ctl_table *ctl, int write, struct file *filp,
+			  void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int r;
+
+	r = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+	if (!r && write)
+		ub_dentry_set_limits(totalram_pages - totalhigh_pages,
+				ULONG_MAX);
+	return r;
+}
+
+static ctl_table ub_dentry_sysctl_table[] = {
+	{
+		.ctl_name	= 1000,
+		.procname	= "dentry_check",
+		.data		= &sysctl_ub_dentry_chk,
+		.maxlen		= sizeof(sysctl_ub_dentry_chk),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= 1001,
+		.procname	= "dentry_watermark",
+		.data		= &sysctl_ub_lowat,
+		.maxlen		= sizeof(sysctl_ub_lowat) * 2,
+		.mode		= 0644,
+		.proc_handler	= &ub_dentry_proc_handler,
+	},
+	{ .ctl_name = 0 }
+};
+static ctl_table ub_dentry_sysctl_root[] = {
+	{
+		.ctl_name	= 23681,
+		.procname	= "ubc",
+		.mode		= 0555,
+		.child		= ub_dentry_sysctl_table,
+	},
+	{ .ctl_name = 0 }
+};
+
+static int __init ub_dentry_init(void)
+{
+	/*
+	 * Initial watermarks are limited, to limit walk time.
+	 * 384MB translates into 0.8 sec on PIII 866MHz.
+	 */
+	ub_dentry_set_limits(totalram_pages - totalhigh_pages,
+			384 * 1024 * 1024 / PAGE_SIZE);
+	if (register_sysctl_table(ub_dentry_sysctl_root, 0) == NULL)
+		return -ENOMEM;
+	return 0;
+}
+__initcall(ub_dentry_init);
diff -upr linux-2.6.16.46-0.12.orig/kernel/ub/ub_mem.c linux-2.6.16.46-0.12-027test011/kernel/ub/ub_mem.c
--- linux-2.6.16.46-0.12.orig/kernel/ub/ub_mem.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/ub/ub_mem.c	2007-08-28 17:35:31.000000000 +0400
@@ -0,0 +1,446 @@
+/*
+ *  kernel/ub/ub_mem.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/kmem_cache.h>
+#include <linux/kmem_slab.h>
+#include <linux/highmem.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/gfp.h>
+#include <linux/swap.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_mem.h>
+#include <ub/ub_page.h>
+#include <ub/ub_hash.h>
+#include <ub/proc.h>
+
+/*
+ * Initialization
+ */
+
+/*
+ * Slab accounting
+ */
+
+#ifdef CONFIG_UBC_DEBUG_KMEM
+
+#define CC_HASH_SIZE	1024
+static struct ub_cache_counter *cc_hash[CC_HASH_SIZE];
+spinlock_t cc_lock;
+
+static void __free_cache_counters(struct user_beancounter *ub,
+		kmem_cache_t *cachep)
+{
+	struct ub_cache_counter *cc, **pprev, *del;
+	int i;
+	unsigned long flags;
+
+	del = NULL;
+	spin_lock_irqsave(&cc_lock, flags);
+	for (i = 0; i < CC_HASH_SIZE; i++) {
+		pprev = &cc_hash[i];
+		cc = cc_hash[i];
+		while (cc != NULL) {
+			if (cc->ub != ub && cc->cachep != cachep) {
+				pprev = &cc->next;
+				cc = cc->next;
+				continue;
+			}
+
+			list_del(&cc->ulist);
+			*pprev = cc->next;
+			cc->next = del;
+			del = cc;
+			cc = *pprev;
+		}
+	}
+	spin_unlock_irqrestore(&cc_lock, flags);
+
+	while (del != NULL) {
+		cc = del->next;
+		kfree(del);
+		del = cc;
+	}
+}
+
+void ub_free_counters(struct user_beancounter *ub)
+{
+	__free_cache_counters(ub, NULL);
+}
+
+void ub_kmemcache_free(kmem_cache_t *cachep)
+{
+	__free_cache_counters(NULL, cachep);
+}
+
+void __init init_cache_counters(void)
+{
+	memset(cc_hash, 0, CC_HASH_SIZE * sizeof(cc_hash[0]));
+	spin_lock_init(&cc_lock);
+}
+
+#define cc_hash_fun(ub, cachep)	(				\
+	(((unsigned long)(ub) >> L1_CACHE_SHIFT) ^		\
+	 ((unsigned long)(ub) >> (BITS_PER_LONG / 2)) ^		\
+	 ((unsigned long)(cachep) >> L1_CACHE_SHIFT) ^		\
+	 ((unsigned long)(cachep) >> (BITS_PER_LONG / 2))	\
+	) & (CC_HASH_SIZE - 1))
+
+static int change_slab_charged(struct user_beancounter *ub,
+		kmem_cache_t *cachep, long val)
+{
+	struct ub_cache_counter *cc, *new_cnt, **pprev;
+	unsigned long flags;
+
+	new_cnt = NULL;
+again:
+	spin_lock_irqsave(&cc_lock, flags);
+	cc = cc_hash[cc_hash_fun(ub, cachep)];
+	while (cc) {
+		if (cc->ub == ub && cc->cachep == cachep)
+			goto found;
+		cc = cc->next;
+	}
+
+	if (new_cnt != NULL)
+		goto insert;
+
+	spin_unlock_irqrestore(&cc_lock, flags);
+
+	new_cnt = kmalloc(sizeof(*new_cnt), GFP_ATOMIC);
+	if (new_cnt == NULL)
+		return -ENOMEM;
+
+	new_cnt->counter = 0;
+	new_cnt->ub = ub;
+	new_cnt->cachep = cachep;
+	goto again;
+
+insert:
+	pprev = &cc_hash[cc_hash_fun(ub, cachep)];
+	new_cnt->next = *pprev;
+	*pprev = new_cnt;
+	list_add(&new_cnt->ulist, &ub->ub_cclist);
+	cc = new_cnt;
+	new_cnt = NULL;
+
+found:
+	cc->counter += val;
+	spin_unlock_irqrestore(&cc_lock, flags);
+	if (new_cnt)
+		kfree(new_cnt);
+	return 0;
+}
+
+static inline int inc_slab_charged(struct user_beancounter *ub,
+	kmem_cache_t *cachep)
+{
+	return change_slab_charged(ub, cachep, 1);
+}
+
+static inline void dec_slab_charged(struct user_beancounter *ub,
+	kmem_cache_t *cachep)
+{
+	if (change_slab_charged(ub, cachep, -1) < 0)
+		BUG();
+}
+
+#include <linux/vmalloc.h>
+
+#define inc_pages_charged(ub, order)	ub_percpu_add(ub, \
+					pages_charged, 1 << order)
+#define dec_pages_charged(ub, order)	ub_percpu_sub(ub, \
+					pages_charged, 1 << order)
+
+#ifdef CONFIG_PROC_FS
+static int bc_kmem_debug_show(struct seq_file *f, void *v)
+{
+	struct user_beancounter *ub;
+	struct ub_cache_counter *cc;
+	long pages, vmpages, pbc;
+	int i;
+
+	ub = seq_beancounter(f);
+
+	pages = vmpages = pbc = 0;
+	for_each_online_cpu(i) {
+		pages += per_cpu_ptr(ub->ub_percpu, i)->pages_charged;
+		vmpages += per_cpu_ptr(ub->ub_percpu, i)->vmalloc_charged;
+		pbc += per_cpu_ptr(ub->ub_percpu, i)->pbcs;
+	}
+	if (pages < 0)
+		pages = 0;
+	if (vmpages < 0)
+		vmpages = 0;
+
+	seq_printf(f, bc_proc_lu_lu_fmt, "pages", pages, PAGE_SIZE);
+	seq_printf(f, bc_proc_lu_lu_fmt, "vmalloced", vmpages, PAGE_SIZE);
+	seq_printf(f, bc_proc_lu_lu_fmt, "pbcs", pbc,
+			sizeof(struct page_beancounter));
+
+	spin_lock_irq(&cc_lock);
+	list_for_each_entry (cc, &ub->ub_cclist, ulist) {
+		kmem_cache_t *cachep;
+
+		cachep = cc->cachep;
+		seq_printf(f, bc_proc_lu_lu_fmt,
+				cachep->name, cc->counter,
+				(unsigned long)cachep->objuse);
+	}
+	spin_unlock_irq(&cc_lock);
+	return 0;
+}
+
+static struct bc_proc_entry bc_kmem_debug_entry = {
+	.name = "kmem_debug",
+	.u.show = bc_kmem_debug_show,
+};
+
+static int __init bc_kmem_debug_init(void)
+{
+	bc_register_proc_entry(&bc_kmem_debug_entry);
+	return 0;
+}
+
+late_initcall(bc_kmem_debug_init);
+#endif
+
+#else
+#define inc_slab_charged(ub, cache)		(0)
+#define dec_slab_charged(ub, cache)		do { } while (0)
+#define inc_pages_charged(ub, cache) 		(0)
+#define dec_pages_charged(ub, cache)		do { } while (0)
+#endif
+
+static inline struct user_beancounter **slab_ub_ref(kmem_cache_t *cachep,
+		void *objp)
+{
+	struct slab *slabp;
+	int objnr;
+
+	BUG_ON(!(cachep->flags & SLAB_UBC));
+	slabp = virt_to_slab(objp);
+	objnr = (objp - slabp->s_mem) / cachep->buffer_size;
+	return slab_ubcs(cachep, slabp) + objnr;
+}
+
+struct user_beancounter *slab_ub(void *objp)
+{
+	struct user_beancounter **ub_ref;
+
+	ub_ref = slab_ub_ref(virt_to_cache(objp), objp);
+	return *ub_ref;
+}
+
+EXPORT_SYMBOL(slab_ub);
+
+#define UB_KMEM_QUANT	(PAGE_SIZE * 4)
+
+/* called with IRQ disabled */
+static int ub_kmemsize_charge(struct user_beancounter *ub,
+		unsigned long size,
+		enum ub_severity strict)
+{
+	struct task_beancounter *tbc;
+
+	tbc = &current->task_bc;
+	if (ub != tbc->task_ub || size > UB_KMEM_QUANT)
+		goto just_charge;
+	if (tbc->kmem_precharged >= size) {
+		tbc->kmem_precharged -= size;
+		return 0;
+	}
+
+	if (charge_beancounter(ub, UB_KMEMSIZE, UB_KMEM_QUANT, UB_HARD) == 0) {
+		tbc->kmem_precharged += UB_KMEM_QUANT - size;
+		return 0;
+	}
+
+just_charge:
+	return charge_beancounter(ub, UB_KMEMSIZE, size, strict);
+}
+
+/* called with IRQ disabled */
+static void ub_kmemsize_uncharge(struct user_beancounter *ub,
+		unsigned long size)
+{
+	struct task_beancounter *tbc;
+
+	if (size > UB_MAXVALUE) {
+		printk("ub_kmemsize_uncharge: size %lu\n", size);
+		dump_stack();
+	}
+
+	tbc = &current->task_bc;
+	if (ub != tbc->task_ub)
+		goto just_uncharge;
+
+	tbc->kmem_precharged += size;
+	if (tbc->kmem_precharged < UB_KMEM_QUANT * 2)
+		return;
+	size = tbc->kmem_precharged - UB_KMEM_QUANT;
+	tbc->kmem_precharged -= size;
+
+just_uncharge:
+	uncharge_beancounter(ub, UB_KMEMSIZE, size);
+}
+
+static inline int should_charge(kmem_cache_t *cachep, gfp_t flags)
+{
+	if (!(cachep->flags & SLAB_UBC))
+		return 0;
+	if ((cachep->flags & SLAB_NO_CHARGE) && !(flags & __GFP_UBC))
+		return 0;
+	return 1;
+}
+
+#define should_uncharge(cachep)	should_charge(cachep, __GFP_UBC)
+
+/* called with IRQ disabled */
+int ub_slab_charge(kmem_cache_t *cachep, void *objp, gfp_t flags)
+{
+	unsigned int size;
+	struct user_beancounter *ub;
+
+	if (!should_charge(cachep, flags))
+		return 0;
+
+	ub = get_beancounter(get_exec_ub());
+	if (ub == NULL)
+		return 0;
+
+	size = CHARGE_SIZE(cachep->objuse);
+	if (ub_kmemsize_charge(ub, size,
+				(flags & __GFP_SOFT_UBC ? UB_SOFT : UB_HARD)))
+		goto out_err;
+
+	if (inc_slab_charged(ub, cachep) < 0) {
+		ub_kmemsize_uncharge(ub, size);
+		goto out_err;
+	}
+	*slab_ub_ref(cachep, objp) = ub;
+	return 0;
+
+out_err:
+	put_beancounter(ub);
+	return -ENOMEM;
+}
+
+/* called with IRQ disabled */
+void ub_slab_uncharge(kmem_cache_t *cachep, void *objp)
+{
+	unsigned int size;
+	struct user_beancounter **ub_ref;
+
+	if (!should_uncharge(cachep))
+		return;
+
+	ub_ref = slab_ub_ref(cachep, objp);
+	if (*ub_ref == NULL)
+		return;
+
+	dec_slab_charged(*ub_ref, cachep);
+	size = CHARGE_SIZE(cachep->objuse);
+	ub_kmemsize_uncharge(*ub_ref, size);
+	put_beancounter(*ub_ref);
+	*ub_ref = NULL;
+}
+
+/*
+ * Pages accounting
+ */
+
+int ub_page_charge(struct page *page, int order, gfp_t mask)
+{
+	struct user_beancounter *ub;
+	unsigned long flags;
+
+	ub = NULL;
+	if (!(mask & __GFP_UBC))
+		goto out;
+
+	ub = get_beancounter(get_exec_ub());
+	if (ub == NULL)
+		goto out;
+
+	local_irq_save(flags);
+	if (ub_kmemsize_charge(ub, CHARGE_ORDER(order),
+				(mask & __GFP_SOFT_UBC ? UB_SOFT : UB_HARD)))
+		goto err;
+
+	inc_pages_charged(ub, order);
+	local_irq_restore(flags);
+out:
+	BUG_ON(page_ub(page) != NULL);
+	page_ub(page) = ub;
+	return 0;
+
+err:
+	local_irq_restore(flags);
+	BUG_ON(page_ub(page) != NULL);
+	put_beancounter(ub);
+	return -ENOMEM;
+}
+
+void ub_page_uncharge(struct page *page, int order)
+{
+	struct user_beancounter *ub;
+	unsigned long flags;
+
+	ub = page_ub(page);
+	if (ub == NULL)
+		return;
+
+	BUG_ON(ub->ub_magic != UB_MAGIC);
+	dec_pages_charged(ub, order);
+	local_irq_save(flags);
+	ub_kmemsize_uncharge(ub, CHARGE_ORDER(order));
+	local_irq_restore(flags);
+	put_beancounter(ub);
+	page_ub(page) = NULL;
+}
+
+/*
+ * takes init_mm.page_table_lock
+ * some outer lock to protect pages from vmalloced area must be held
+ */
+struct user_beancounter *vmalloc_ub(void *obj)
+{
+	struct page *pg;
+
+	pg = vmalloc_to_page(obj);
+	if (pg == NULL)
+		return NULL;
+
+	return page_ub(pg);
+}
+
+EXPORT_SYMBOL(vmalloc_ub);
+
+struct user_beancounter *mem_ub(void *obj)
+{
+	struct user_beancounter *ub;
+
+	if ((unsigned long)obj >= VMALLOC_START &&
+	    (unsigned long)obj  < VMALLOC_END)
+		ub = vmalloc_ub(obj);
+	else
+		ub = slab_ub(obj);
+
+	return ub;
+}
+
+EXPORT_SYMBOL(mem_ub);
diff -upr linux-2.6.16.46-0.12.orig/kernel/ub/ub_misc.c linux-2.6.16.46-0.12-027test011/kernel/ub/ub_misc.c
--- linux-2.6.16.46-0.12.orig/kernel/ub/ub_misc.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/ub/ub_misc.c	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,454 @@
+/*
+ *  kernel/ub/ub_misc.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
+#include <linux/signal.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/kmem_cache.h>
+#include <linux/module.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_mem.h>
+#include <ub/proc.h>
+
+#define UB_FILE_MINQUANT	3
+#define UB_FILE_MAXQUANT	10
+#define UB_FILE_INIQUANT	4
+
+static unsigned long ub_file_precharge(struct task_beancounter *task_bc,
+		struct user_beancounter *ub, unsigned long *kmemsize);
+
+static inline unsigned long ub_file_kmemsize(unsigned long nr)
+{
+	return CHARGE_SIZE(filp_cachep->objuse) * nr;
+}
+
+/*
+ * Task staff
+ */
+
+static void init_task_sub(struct task_struct *parent,
+		struct task_struct *tsk,
+  		struct task_beancounter *old_bc)
+{
+	struct task_beancounter *new_bc;
+	struct user_beancounter *sub;
+
+	new_bc = &tsk->task_bc;
+	sub = old_bc->fork_sub;
+	new_bc->fork_sub = get_beancounter(sub);
+	new_bc->task_fnode = NULL;
+	new_bc->task_freserv = old_bc->task_freserv;
+	old_bc->task_freserv = NULL;
+	memset(&new_bc->task_data, 0, sizeof(new_bc->task_data));
+	new_bc->pgfault_handle = 0;
+	new_bc->pgfault_allot = 0;
+}
+
+void ub_init_task_bc(struct task_beancounter *tbc)
+{
+	tbc->file_precharged = 0;
+	tbc->file_quant = UB_FILE_INIQUANT;
+	tbc->file_count = 0;
+
+	tbc->kmem_precharged = 0;
+	tbc->dentry_alloc = 0;
+}
+
+int ub_task_charge(struct task_struct *parent, struct task_struct *task)
+{
+	struct task_beancounter *old_bc;
+	struct task_beancounter *new_bc;
+	struct user_beancounter *ub, *pub;
+	unsigned long file_nr, kmemsize;
+	unsigned long flags;
+
+	old_bc = &parent->task_bc;
+	ub = old_bc->fork_sub;
+	new_bc = &task->task_bc;
+	new_bc->task_ub = get_beancounter(ub);
+	new_bc->exec_ub = get_beancounter(ub);
+
+	pub = top_beancounter(ub);
+	spin_lock_irqsave(&pub->ub_lock, flags);
+	if (unlikely(__charge_beancounter_locked(pub, UB_NUMPROC,
+					1, UB_HARD) < 0))
+		goto out_numproc;
+
+	ub_init_task_bc(new_bc);
+	file_nr = ub_file_precharge(new_bc, pub, &kmemsize);
+	spin_unlock_irqrestore(&pub->ub_lock, flags);
+
+	charge_beancounter_notop(ub, UB_NUMPROC, 1);
+	if (likely(file_nr)) {
+		charge_beancounter_notop(ub, UB_NUMFILE, file_nr);
+		charge_beancounter_notop(ub, UB_KMEMSIZE, kmemsize);
+	}
+
+	init_task_sub(parent, task, old_bc);
+	return 0;
+
+out_numproc:
+	spin_unlock_irqrestore(&pub->ub_lock, flags);
+	__put_beancounter_batch(ub, 2);
+	return -ENOMEM;
+}
+
+extern atomic_t dbgpre;
+
+void ub_task_uncharge(struct task_struct *task)
+{
+	struct task_beancounter *task_bc;
+	struct user_beancounter *pub;
+	unsigned long file_nr, file_kmemsize;
+	unsigned long flags;
+
+	task_bc = &task->task_bc;
+	pub = top_beancounter(task_bc->task_ub);
+	spin_lock_irqsave(&pub->ub_lock, flags);
+	__uncharge_beancounter_locked(pub, UB_NUMPROC, 1);
+	file_nr = task_bc->file_precharged;
+	if (likely(file_nr))
+		__uncharge_beancounter_locked(pub,
+				UB_NUMFILE, file_nr);
+
+	/* see comment in ub_file_charge */
+	task_bc->file_precharged = 0;
+	file_kmemsize = ub_file_kmemsize(file_nr);
+	if (likely(file_kmemsize))
+		__uncharge_beancounter_locked(pub,
+				UB_KMEMSIZE, file_kmemsize);
+	spin_unlock_irqrestore(&pub->ub_lock, flags);
+
+	uncharge_beancounter_notop(task_bc->task_ub, UB_NUMPROC, 1);
+	if (likely(file_nr)) {
+		uncharge_beancounter_notop(task_bc->task_ub,
+				UB_NUMFILE, file_nr);
+		__put_beancounter_batch(task_bc->task_ub, file_nr);
+	}
+	if (likely(file_kmemsize))
+		uncharge_beancounter_notop(task_bc->task_ub,
+				UB_KMEMSIZE, file_kmemsize);
+}
+
+void ub_task_put(struct task_struct *task)
+{
+	struct task_beancounter *task_bc;
+	struct user_beancounter *pub;
+	unsigned long kmemsize, flags;
+
+	task_bc = &task->task_bc;
+
+	pub = top_beancounter(task_bc->task_ub);
+	spin_lock_irqsave(&pub->ub_lock, flags);
+	kmemsize = task_bc->kmem_precharged;
+	task_bc->kmem_precharged = 0;
+	if (likely(kmemsize))
+		__uncharge_beancounter_locked(pub, UB_KMEMSIZE, kmemsize);
+	spin_unlock_irqrestore(&pub->ub_lock, flags);
+	if (likely(kmemsize))
+		uncharge_beancounter_notop(task_bc->task_ub, UB_KMEMSIZE, kmemsize);
+
+	put_beancounter(task_bc->exec_ub);
+	put_beancounter(task_bc->task_ub);
+	put_beancounter(task_bc->fork_sub);
+	/* can't be freed elsewhere, failures possible in the middle of fork */
+	if (task_bc->task_freserv != NULL)
+		kfree(task_bc->task_freserv);
+
+	task_bc->exec_ub = (struct user_beancounter *)0xdeadbcbc;
+	task_bc->task_ub = (struct user_beancounter *)0xdead100c;
+	BUG_ON(task_bc->kmem_precharged != 0);
+}
+
+/*
+ * Files and file locks.
+ */
+/*
+ * For NUMFILE, we do not take a lock and call charge function
+ * for every file.  We try to charge in batches, keeping local reserve on
+ * task.  For experimental purposes, batch size is adaptive and depends
+ * on numfile barrier, number of processes, and the history of successes and
+ * failures of batch charges.
+ *
+ * Per-task fields have the following meaning
+ *   file_precharged    number of files charged to beancounter in advance,
+ *   file_quant         logarithm of batch size
+ *   file_count         counter of charge successes, to reduce batch size
+ *                      fluctuations.
+ */
+static unsigned long ub_file_precharge(struct task_beancounter *task_bc,
+		struct user_beancounter *ub, unsigned long *kmemsize)
+{
+	unsigned long n, kmem;
+
+	n = 1UL << task_bc->file_quant;
+	if (ub->ub_parms[UB_NUMPROC].held >
+			(ub->ub_parms[UB_NUMFILE].barrier >>
+						task_bc->file_quant))
+		goto nopre;
+	if (unlikely(__charge_beancounter_locked(ub, UB_NUMFILE, n, UB_HARD)))
+		goto nopre;
+	kmem = ub_file_kmemsize(n);
+	if (unlikely(__charge_beancounter_locked(ub, UB_KMEMSIZE,
+					kmem, UB_HARD)))
+		goto nopre_kmem;
+
+	task_bc->file_precharged += n;
+	get_beancounter_batch(task_bc->task_ub, n);
+	task_bc->file_count++;
+	if (task_bc->file_quant < UB_FILE_MAXQUANT &&
+	    task_bc->file_count >= task_bc->file_quant) {
+		task_bc->file_quant++;
+		task_bc->file_count = 0;
+	}
+	*kmemsize = kmem;
+	return n;
+
+nopre_kmem:
+	__uncharge_beancounter_locked(ub, UB_NUMFILE, n);
+nopre:
+	if (task_bc->file_quant > UB_FILE_MINQUANT)
+		task_bc->file_quant--;
+	task_bc->file_count = 0;
+	return 0;
+}
+
+int ub_file_charge(struct file *f)
+{
+	struct user_beancounter *ub, *pub;
+	struct task_beancounter *task_bc;
+	unsigned long file_nr, kmem;
+	unsigned long flags;
+	int err;
+
+	task_bc = &current->task_bc;
+	ub = get_exec_ub();
+	if (unlikely(ub != task_bc->task_ub))
+		goto just_charge;
+
+	if (likely(task_bc->file_precharged > 0)) {
+		/*
+		 * files are put via RCU in 2.6.16 so during
+		 * this decrement an IRQ can happen and called
+		 * ub_files_uncharge() will mess file_precharged
+		 *
+		 * ub_task_uncharge() is called via RCU also so no
+		 * protection is needed there
+		 *
+		 * Xemul
+		 */
+
+		local_irq_save(flags);
+		task_bc->file_precharged--;
+		local_irq_restore(flags);
+
+		f->f_ub = ub;
+		return 0;
+	}
+
+	pub = top_beancounter(ub);
+	spin_lock_irqsave(&pub->ub_lock, flags);
+	file_nr = ub_file_precharge(task_bc, pub, &kmem);
+	if (unlikely(!file_nr))
+		goto last_try;
+	spin_unlock(&pub->ub_lock);
+	task_bc->file_precharged--;
+	local_irq_restore(flags);
+
+	charge_beancounter_notop(ub, UB_NUMFILE, file_nr);
+	charge_beancounter_notop(ub, UB_KMEMSIZE, kmem);
+	f->f_ub = ub;
+	return 0;
+
+just_charge:
+	pub = top_beancounter(ub);
+	spin_lock_irqsave(&pub->ub_lock, flags);
+last_try:
+	kmem = ub_file_kmemsize(1);
+	err = __charge_beancounter_locked(pub, UB_NUMFILE, 1, UB_HARD);
+	if (likely(!err)) {
+		err = __charge_beancounter_locked(pub, UB_KMEMSIZE,
+				kmem, UB_HARD);
+		if (unlikely(err))
+			__uncharge_beancounter_locked(pub, UB_NUMFILE, 1);
+	}
+	spin_unlock_irqrestore(&pub->ub_lock, flags);
+	if (likely(!err)) {
+		charge_beancounter_notop(ub, UB_NUMFILE, 1);
+		charge_beancounter_notop(ub, UB_KMEMSIZE, kmem);
+		f->f_ub = get_beancounter(ub);
+	}
+	return err;
+}
+
+void ub_file_uncharge(struct file *f)
+{
+	struct user_beancounter *ub, *pub;
+	struct task_beancounter *task_bc;
+	unsigned long nr;
+
+	ub = f->f_ub;
+	task_bc = &current->task_bc;
+	if (likely(ub == task_bc->task_ub)) {
+		task_bc->file_precharged++;
+		pub = top_beancounter(ub);
+		if (ub_barrier_farnr(pub, UB_NUMFILE) &&
+				ub_barrier_farsz(pub, UB_KMEMSIZE))
+			return;
+		if (task_bc->file_precharged < (1UL << task_bc->file_quant))
+			return;
+		nr = task_bc->file_precharged
+			- (1UL << (task_bc->file_quant - 1));
+		task_bc->file_precharged -= nr;
+		__put_beancounter_batch(ub, nr);
+		uncharge_beancounter(ub, UB_NUMFILE, nr);
+		uncharge_beancounter(ub, UB_KMEMSIZE, ub_file_kmemsize(nr));
+	} else {
+		uncharge_beancounter(ub, UB_NUMFILE, 1);
+		uncharge_beancounter(ub, UB_KMEMSIZE, ub_file_kmemsize(1));
+		put_beancounter(ub);
+	}
+}
+
+int ub_flock_charge(struct file_lock *fl, int hard)
+{
+	struct user_beancounter *ub;
+	int err;
+
+	/* No need to get_beancounter here since it's already got in slab */
+	ub = slab_ub(fl);
+	if (ub == NULL)
+		return 0;
+
+	err = charge_beancounter(ub, UB_NUMFLOCK, 1, hard ? UB_HARD : UB_SOFT);
+	if (!err)
+		fl->fl_charged = 1;
+	return err;
+}
+
+void ub_flock_uncharge(struct file_lock *fl)
+{
+	struct user_beancounter *ub;
+
+	/* Ub will be put in slab */
+	ub = slab_ub(fl);
+	if (ub == NULL || !fl->fl_charged)
+		return;
+
+	uncharge_beancounter(ub, UB_NUMFLOCK, 1);
+	fl->fl_charged = 0;
+}
+
+/*
+ * Signal handling
+ */
+
+static int do_ub_siginfo_charge(struct user_beancounter *ub,
+		unsigned long size)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	if (__charge_beancounter_locked(ub, UB_KMEMSIZE, size, UB_HARD))
+		goto out_kmem;
+
+	if (__charge_beancounter_locked(ub, UB_NUMSIGINFO, 1, UB_HARD))
+		goto out_num;
+
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return 0;
+
+out_num:
+	__uncharge_beancounter_locked(ub, UB_KMEMSIZE, size);
+out_kmem:
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return -ENOMEM;
+}
+
+static void do_ub_siginfo_uncharge(struct user_beancounter *ub,
+		unsigned long size)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	__uncharge_beancounter_locked(ub, UB_KMEMSIZE, size);
+	__uncharge_beancounter_locked(ub, UB_NUMSIGINFO, 1);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+int ub_siginfo_charge(struct sigqueue *sq, struct user_beancounter *ub)
+{
+	unsigned long size;
+	struct user_beancounter *p, *q;
+
+	size = CHARGE_SIZE(kmem_obj_memusage(sq));
+	for (p = ub; p != NULL; p = p->parent) {
+		if (do_ub_siginfo_charge(p, size))
+			goto unroll;
+	}
+
+	sq->sig_ub = get_beancounter(ub);
+	return 0;
+
+unroll:
+	for (q = ub; q != p; q = q->parent)
+		do_ub_siginfo_uncharge(q, size);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(ub_siginfo_charge);
+
+void ub_siginfo_uncharge(struct sigqueue *sq)
+{
+	unsigned long size;
+	struct user_beancounter *ub, *p;
+
+	p = ub = sq->sig_ub;
+	sq->sig_ub = NULL;
+	size = CHARGE_SIZE(kmem_obj_memusage(sq));
+	for (; ub != NULL; ub = ub->parent)
+		do_ub_siginfo_uncharge(ub, size);
+	put_beancounter(p);
+}
+
+/*
+ * PTYs
+ */
+
+int ub_pty_charge(struct tty_struct *tty)
+{
+	struct user_beancounter *ub;
+	int retval;
+
+	ub = slab_ub(tty);
+	retval = 0;
+	if (ub && tty->driver->subtype == PTY_TYPE_MASTER &&
+			!test_bit(TTY_CHARGED, &tty->flags)) {
+		retval = charge_beancounter(ub, UB_NUMPTY, 1, UB_HARD);
+		if (!retval)
+			set_bit(TTY_CHARGED, &tty->flags);
+	}
+	return retval;
+}
+
+void ub_pty_uncharge(struct tty_struct *tty)
+{
+	struct user_beancounter *ub;
+
+	ub = slab_ub(tty);
+	if (ub && tty->driver->subtype == PTY_TYPE_MASTER &&
+			test_bit(TTY_CHARGED, &tty->flags)) {
+		uncharge_beancounter(ub, UB_NUMPTY, 1);
+		clear_bit(TTY_CHARGED, &tty->flags);
+	}
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/ub/ub_net.c linux-2.6.16.46-0.12-027test011/kernel/ub/ub_net.c
--- linux-2.6.16.46-0.12.orig/kernel/ub/ub_net.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/ub/ub_net.c	2007-08-28 17:35:36.000000000 +0400
@@ -0,0 +1,1146 @@
+/*
+ *  linux/kernel/ub/ub_net.c
+ *
+ *  Copyright (C) 1998-2004  Andrey V. Savochkin <saw@saw.sw.com.sg>
+ *  Copyright (C) 2005 SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * TODO:
+ *   - sizeof(struct inode) charge
+ *   = tcp_mem_schedule() feedback based on ub limits
+ *   + measures so that one socket won't exhaust all send buffers,
+ *     see bug in bugzilla
+ *   = sk->socket check for NULL in snd_wakeups
+ *     (tcp_write_space checks for NULL itself)
+ *   + in tcp_close(), orphaned socket abortion should be based on ubc
+ *     resources (same in tcp_out_of_resources)
+ *     Beancounter should also have separate orphaned socket counter...
+ *   + for rcv, in-order segment should be accepted
+ *     if only barrier is exceeded
+ *   = tcp_rmem_schedule() feedback based on ub limits
+ *   - repair forward_alloc mechanism for receive buffers
+ *     It's idea is that some buffer space is pre-charged so that receive fast
+ *     path doesn't need to take spinlocks and do other heavy stuff
+ *   + tcp_prune_queue actions based on ub limits
+ *   + window adjustments depending on available buffers for receive
+ *   - window adjustments depending on available buffers for send
+ *   + race around usewreserv
+ *   + avoid allocating new page for each tiny-gram, see letter from ANK
+ *   + rename ub_sock_lock
+ *   + sk->sleep wait queue probably can be used for all wakeups, and
+ *     sk->ub_wait is unnecessary
+ *   + for UNIX sockets, the current algorithm will lead to
+ *     UB_UNIX_MINBUF-sized messages only for non-blocking case
+ *   - charge for af_packet sockets
+ *   + all datagram sockets should be charged to NUMUNIXSOCK
+ *   - we do not charge for skb copies and clones staying in device queues
+ *   + live-lock if number of sockets is big and buffer limits are small
+ *     [diff-ubc-dbllim3]
+ *   - check that multiple readers/writers on the same socket won't cause fatal
+ *     consequences
+ *   - check allocation/charge orders
+ *   + There is potential problem with callback_lock.  In *snd_wakeup we take
+ *     beancounter first, in sock_def_error_report - callback_lock first.
+ *     then beancounter.  This is not a problem if callback_lock taken
+ *     readonly, but anyway...
+ *   - SKB_CHARGE_SIZE doesn't include the space wasted by slab allocator
+ * General kernel problems:
+ *   - in tcp_sendmsg(), if allocation fails, non-blocking sockets with ASYNC
+ *     notification won't get signals
+ *   - datagram_poll looks racy
+ *
+ */
+
+#include <linux/net.h>
+#include <linux/slab.h>
+#include <linux/kmem_cache.h>
+#include <linux/gfp.h>
+#include <linux/err.h>
+#include <linux/socket.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+
+#include <net/sock.h>
+#include <net/tcp.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_net.h>
+#include <ub/ub_debug.h>
+
+/* by some reason it is not used currently */
+#define UB_SOCK_MAINTAIN_WMEMPRESSURE	0
+
+
+/* Skb truesize definition. Bad place. Den */
+
+static inline int skb_chargesize_head(struct sk_buff *skb)
+{
+	return skb_charge_size(skb->end - skb->head +
+				sizeof(struct skb_shared_info));
+}
+
+int skb_charge_fullsize(struct sk_buff *skb)
+{
+	int chargesize;
+	struct sk_buff *skbfrag;
+
+	chargesize = skb_chargesize_head(skb) +
+		PAGE_SIZE * skb_shinfo(skb)->nr_frags;
+	if (likely(skb_shinfo(skb)->frag_list == NULL))
+		return chargesize;
+	for (skbfrag = skb_shinfo(skb)->frag_list;
+	     skbfrag != NULL;
+	     skbfrag = skbfrag->next) {
+		chargesize += skb_charge_fullsize(skbfrag);
+	}
+	return chargesize;
+}
+EXPORT_SYMBOL(skb_charge_fullsize);
+
+static int ub_sock_makewreserv_locked(struct sock *sk,
+		int bufid, unsigned long size);
+
+int __ub_too_many_orphans(struct sock *sk, int count)
+{
+	struct user_beancounter *ub;
+
+	if (sock_has_ubc(sk)) {
+		ub = top_beancounter(sock_bc(sk)->ub);
+		if (count >= ub->ub_parms[UB_NUMTCPSOCK].barrier >> 2)
+			return 1;
+	}
+	return 0;
+}
+
+/*
+ * Queueing
+ */
+
+static void ub_sock_snd_wakeup(struct user_beancounter *ub)
+{
+	struct list_head *p;
+	struct sock *sk;
+	struct sock_beancounter *skbc;
+	struct socket *sock;
+	unsigned long added;
+
+	while (!list_empty(&ub->ub_other_sk_list)) {
+		p = ub->ub_other_sk_list.next;
+		skbc = list_entry(p, struct sock_beancounter, ub_sock_list);
+		sk = skbc_sock(skbc);
+
+		added = 0;
+		sock = sk->sk_socket;
+		if (sock == NULL) {
+			/* sk being destroyed */
+			list_del_init(&skbc->ub_sock_list);
+			continue;
+		}
+
+		ub_debug(UBD_NET_SLEEP,
+				"Checking queue, waiting %lu, reserv %lu\n",
+				skbc->ub_waitspc, skbc->poll_reserv);
+		added = -skbc->poll_reserv;
+		if (ub_sock_makewreserv_locked(sk, UB_OTHERSOCKBUF,
+					skbc->ub_waitspc))
+			break;
+		added += skbc->poll_reserv;
+
+		list_del_init(&skbc->ub_sock_list);
+
+		/*
+		 * See comments in ub_tcp_snd_wakeup.
+		 * Locking note: both unix_write_space and
+		 * sock_def_write_space take callback_lock themselves.
+		 * We take it here just to be on the safe side and to
+		 * act the same way as ub_tcp_snd_wakeup does.
+		 */
+		sock_hold(sk);
+		read_lock(&sk->sk_callback_lock);
+		spin_unlock(&ub->ub_lock);
+
+		sk->sk_write_space(sk);
+		read_unlock(&sk->sk_callback_lock);
+
+		if (skbc->ub != ub && added)
+			charge_beancounter_notop(skbc->ub,
+				       	UB_OTHERSOCKBUF, added);
+		sock_put(sk);
+
+		spin_lock(&ub->ub_lock);
+	}
+}
+
+static void ub_tcp_snd_wakeup(struct user_beancounter *ub)
+{
+	struct list_head *p;
+	struct sock *sk;
+	struct sock_beancounter *skbc;
+	struct socket *sock;
+	unsigned long added;
+
+	while (!list_empty(&ub->ub_tcp_sk_list)) {
+		p = ub->ub_tcp_sk_list.next;
+		skbc = list_entry(p, struct sock_beancounter, ub_sock_list);
+		sk = skbc_sock(skbc);
+
+		added = 0;
+		sock = sk->sk_socket;
+		if (sock == NULL) {
+			/* sk being destroyed */
+			list_del_init(&skbc->ub_sock_list);
+			continue;
+		}
+
+		ub_debug(UBD_NET_SLEEP,
+				"Checking queue, waiting %lu, reserv %lu\n",
+				skbc->ub_waitspc, skbc->poll_reserv);
+		added = -skbc->poll_reserv;
+		if (ub_sock_makewreserv_locked(sk, UB_TCPSNDBUF,
+					skbc->ub_waitspc))
+			break;
+		added += skbc->poll_reserv;
+
+		list_del_init(&skbc->ub_sock_list);
+
+		/*
+		 * Send async notifications and wake up.
+		 * Locking note: we get callback_lock here because
+		 * tcp_write_space is over-optimistic about calling context
+		 * (socket lock is presumed).  So we get the lock here although
+		 * it belongs to the callback.
+		 */
+		sock_hold(sk);
+		read_lock(&sk->sk_callback_lock);
+		spin_unlock(&ub->ub_lock);
+
+		sk->sk_write_space(sk);
+		read_unlock(&sk->sk_callback_lock);
+
+		if (skbc->ub != ub && added)
+			charge_beancounter_notop(skbc->ub, UB_TCPSNDBUF, added);
+		sock_put(sk);
+
+		spin_lock(&ub->ub_lock);
+	}
+}
+
+void ub_sock_snd_queue_add(struct sock *sk, int res, unsigned long size)
+{
+	unsigned long flags;
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+	unsigned long added_reserv;
+
+	if (!sock_has_ubc(sk))
+		return;
+
+	skbc = sock_bc(sk);
+	ub = top_beancounter(skbc->ub);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub_debug(UBD_NET_SLEEP, "attempt to charge for %lu\n", size);
+	added_reserv = -skbc->poll_reserv;
+	if (!ub_sock_makewreserv_locked(sk, res, size)) {
+		/*
+		 * It looks a bit hackish, but it is compatible with both
+		 * wait_for_xx_ubspace and poll.
+		 * This __set_current_state is equivalent to a wakeup event
+		 * right after spin_unlock_irqrestore.
+		 */
+		__set_current_state(TASK_RUNNING);
+		added_reserv += skbc->poll_reserv;
+		spin_unlock_irqrestore(&ub->ub_lock, flags);
+		if (added_reserv)
+			charge_beancounter_notop(skbc->ub, res, added_reserv);
+		return;
+	}
+
+	ub_debug(UBD_NET_SLEEP, "Adding sk to queue\n");
+	skbc->ub_waitspc = size;
+	if (!list_empty(&skbc->ub_sock_list)) {
+		ub_debug(UBD_NET_SOCKET,
+				"re-adding socket to beancounter %p.\n", ub);
+		goto out;
+	}
+
+	switch (res) {
+		case UB_TCPSNDBUF:
+			list_add_tail(&skbc->ub_sock_list,
+					&ub->ub_tcp_sk_list);
+			break;
+		case UB_OTHERSOCKBUF:
+			list_add_tail(&skbc->ub_sock_list,
+					&ub->ub_other_sk_list);
+			break;
+		default:
+			BUG();
+	}
+out:
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+EXPORT_SYMBOL(ub_sock_snd_queue_add);
+
+long ub_sock_wait_for_space(struct sock *sk, long timeo, unsigned long size)
+{
+	DECLARE_WAITQUEUE(wait, current);
+
+	add_wait_queue(sk->sk_sleep, &wait);
+	for (;;) {
+		if (signal_pending(current))
+			break;
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (!ub_sock_make_wreserv(sk, UB_OTHERSOCKBUF, size))
+			break;
+
+		if (sk->sk_shutdown & SEND_SHUTDOWN)
+			break;
+		if (sk->sk_err)
+			break;
+		ub_sock_snd_queue_add(sk, UB_OTHERSOCKBUF, size);
+		timeo = schedule_timeout(timeo);
+	}
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(sk->sk_sleep, &wait);
+	return timeo;
+}
+
+void ub_sock_sndqueuedel(struct sock *sk)
+{
+	struct user_beancounter *ub;
+	struct sock_beancounter *skbc;
+	unsigned long flags;
+
+	if (!sock_has_ubc(sk))
+		return;
+	skbc = sock_bc(sk);
+
+	/* race with write_space callback of other socket */
+	ub = top_beancounter(skbc->ub);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	list_del_init(&skbc->ub_sock_list);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+/*
+ * Helpers
+ */
+
+static inline void __ub_skb_set_charge(struct sk_buff *skb, struct sock *sk,
+		       unsigned long size, int resource)
+{
+	skb_bc(skb)->ub = sock_bc(sk)->ub;
+	skb_bc(skb)->charged = size;
+	skb_bc(skb)->resource = resource;
+}
+
+void ub_skb_set_charge(struct sk_buff *skb, struct sock *sk,
+		       unsigned long size, int resource)
+{
+	if (!sock_has_ubc(sk))
+		return;
+
+	if (sock_bc(sk)->ub == NULL)
+		BUG();
+
+	__ub_skb_set_charge(skb, sk, size, resource);
+
+	/* Ugly. Ugly. Skb in sk writequeue can live without ref to sk */
+	if (skb->sk == NULL)
+		skb->sk = sk;
+}
+
+EXPORT_SYMBOL(ub_skb_set_charge);
+
+static inline void ub_skb_set_uncharge(struct sk_buff *skb)
+{
+	skb_bc(skb)->ub = NULL;
+	skb_bc(skb)->charged = 0;
+	skb_bc(skb)->resource = 0;
+}
+
+static void ub_update_rmem_thres(struct sock_beancounter *skub)
+{
+	struct user_beancounter *ub;
+
+	if (skub && skub->ub) {
+		ub = top_beancounter(skub->ub);
+		ub->ub_rmem_thres = ub->ub_parms[UB_TCPRCVBUF].barrier /
+			(ub->ub_parms[UB_NUMTCPSOCK].held + 1);
+	}
+}
+
+static inline void ub_sock_wcharge_dec(struct sock *sk,
+		unsigned long chargesize)
+{
+	/* The check sk->sk_family != PF_NETLINK is made as the skb is
+	 * queued to the kernel end of socket while changed to the user one.
+	 * Den */
+	if (unlikely(sock_bc(sk)->ub_wcharged) && sk->sk_family != PF_NETLINK) {
+		if (sock_bc(sk)->ub_wcharged > chargesize)
+			sock_bc(sk)->ub_wcharged -= chargesize;
+		else
+			sock_bc(sk)->ub_wcharged = 0;
+	}
+}
+
+/*
+ * Charge socket number
+ */
+
+static inline void sk_alloc_beancounter(struct sock *sk)
+{
+	struct sock_beancounter *skbc;
+
+	skbc = sock_bc(sk);
+	memset(skbc, 0, sizeof(struct sock_beancounter));
+}
+
+static inline void sk_free_beancounter(struct sock *sk)
+{
+}
+
+static int __sock_charge(struct sock *sk, int res)
+{
+	struct sock_beancounter *skbc;
+	struct user_beancounter *cub, *ub;
+	unsigned long added_reserv, added_forw;
+	unsigned long flags;
+
+	cub = get_exec_ub();
+	if (unlikely(cub == NULL))
+		return 0;
+
+	sk_alloc_beancounter(sk);
+	skbc = sock_bc(sk);
+	INIT_LIST_HEAD(&skbc->ub_sock_list);
+
+	ub = top_beancounter(cub);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	if (unlikely(__charge_beancounter_locked(ub, res, 1, UB_HARD) < 0))
+		goto out_limit;
+
+	added_reserv = 0;
+	added_forw = 0;
+	if (res == UB_NUMTCPSOCK) {
+		added_reserv = skb_charge_size(MAX_TCP_HEADER +
+				1500 - sizeof(struct iphdr) -
+					sizeof(struct tcphdr));
+		added_reserv *= 4;
+		ub->ub_parms[UB_TCPSNDBUF].held += added_reserv;
+		if (!ub_barrier_farsz(ub, UB_TCPSNDBUF)) {
+			ub->ub_parms[UB_TCPSNDBUF].held -= added_reserv;
+			added_reserv = 0;
+		}
+		skbc->poll_reserv = added_reserv;
+
+		added_forw = SK_STREAM_MEM_QUANTUM * 4;
+		ub->ub_parms[UB_TCPRCVBUF].held += added_forw;
+		if (!ub_barrier_farsz(ub, UB_TCPRCVBUF)) {
+			ub->ub_parms[UB_TCPRCVBUF].held -= added_forw;
+			added_forw = 0;
+		}
+		skbc->forw_space = added_forw;
+	}
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	charge_beancounter_notop(cub, res, 1);
+	if (added_reserv)
+		charge_beancounter_notop(cub, UB_TCPSNDBUF, added_reserv);
+	if (added_forw)
+		charge_beancounter_notop(cub, UB_TCPRCVBUF, added_forw);
+
+	skbc->ub = get_beancounter(cub);
+	return 0;
+
+out_limit:
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	sk_free_beancounter(sk);
+	return -ENOMEM;
+}
+
+int ub_tcp_sock_charge(struct sock *sk)
+{
+	int ret;
+
+	ret = __sock_charge(sk, UB_NUMTCPSOCK);
+	ub_update_rmem_thres(sock_bc(sk));
+
+	return ret;
+}
+
+int ub_other_sock_charge(struct sock *sk)
+{
+	return __sock_charge(sk, UB_NUMOTHERSOCK);
+}
+
+EXPORT_SYMBOL(ub_other_sock_charge);
+
+int ub_sock_charge(struct sock *sk, int family, int type)
+{
+	return (IS_TCP_SOCK(family, type) ?
+			ub_tcp_sock_charge(sk) : ub_other_sock_charge(sk));
+}
+EXPORT_SYMBOL(ub_sock_charge);
+
+/*
+ * Uncharge socket number
+ */
+
+void ub_sock_uncharge(struct sock *sk)
+{
+	int is_tcp_sock;
+	unsigned long flags;
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+	unsigned long reserv, forw;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return;
+
+	is_tcp_sock = IS_TCP_SOCK(sk->sk_family, sk->sk_type);
+	skbc = sock_bc(sk);
+	ub_debug(UBD_NET_SOCKET, "Calling ub_sock_uncharge on %p\n", sk);
+
+	ub = top_beancounter(skbc->ub);
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	if (!list_empty(&skbc->ub_sock_list)) {
+		ub_debug(UBD_NET_SOCKET,
+			 "ub_sock_uncharge: removing from ub(%p) queue.\n",
+			 skbc);
+		list_del_init(&skbc->ub_sock_list);
+	}
+
+	reserv = skbc->poll_reserv;
+	forw = skbc->forw_space;
+	__uncharge_beancounter_locked(ub,
+			(is_tcp_sock ? UB_TCPSNDBUF : UB_OTHERSOCKBUF),
+			reserv);
+	if (forw)
+		__uncharge_beancounter_locked(ub,
+				(is_tcp_sock ? UB_TCPRCVBUF : UB_DGRAMRCVBUF),
+				forw);
+	__uncharge_beancounter_locked(ub,
+			(is_tcp_sock ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK), 1);
+
+	ub_sock_wcharge_dec(sk, reserv);
+	if (unlikely(skbc->ub_wcharged))
+		printk(KERN_WARNING
+		       "ub_sock_uncharge: wch=%lu for ub %p (%d).\n",
+		       skbc->ub_wcharged, skbc->ub, skbc->ub->ub_uid);
+	skbc->poll_reserv = 0;
+	skbc->forw_space = 0;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	uncharge_beancounter_notop(skbc->ub,
+			(is_tcp_sock ? UB_TCPSNDBUF : UB_OTHERSOCKBUF),
+			reserv);
+	if (forw)
+		uncharge_beancounter_notop(skbc->ub,
+				(is_tcp_sock ? UB_TCPRCVBUF : UB_DGRAMRCVBUF),
+				forw);
+	uncharge_beancounter_notop(skbc->ub,
+			(is_tcp_sock ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK), 1);
+
+	put_beancounter(skbc->ub);
+	sk_free_beancounter(sk);
+}
+
+/*
+ * Special case for netlink_dump - (un)charges precalculated size
+ */
+
+int ub_nlrcvbuf_charge(struct sk_buff *skb, struct sock *sk)
+{
+	int ret;
+	unsigned long chargesize;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return 0;
+
+	chargesize = skb_charge_fullsize(skb);
+	ret = charge_beancounter(sock_bc(sk)->ub,
+			UB_DGRAMRCVBUF, chargesize, UB_HARD);
+	if (ret < 0)
+		return ret;
+	ub_skb_set_charge(skb, sk, chargesize, UB_DGRAMRCVBUF);
+	return ret;
+}
+
+/*
+ * Poll reserve accounting
+ *
+ * This is the core of socket buffer management (along with queueing/wakeup
+ * functions.  The rest of buffer accounting either call these functions, or
+ * repeat parts of their logic for some simpler cases.
+ */
+
+static int ub_sock_makewreserv_locked(struct sock *sk,
+		int bufid, unsigned long size)
+{
+	unsigned long wcharge_added;
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+
+	skbc = sock_bc(sk);
+	if (skbc->poll_reserv >= size) /* no work to be done */
+		goto out;
+
+	ub = top_beancounter(skbc->ub);
+	ub->ub_parms[bufid].held += size - skbc->poll_reserv;
+
+	wcharge_added = 0;
+	/*
+	 * Logic:
+	 *  1) when used memory hits barrier, we set wmem_pressure;
+	 *     wmem_pressure is reset under barrier/2;
+	 *     between barrier/2 and barrier we limit per-socket buffer growth;
+	 *  2) each socket is guaranteed to get (limit-barrier)/maxsockets
+	 *     calculated on the base of memory eaten after the barrier is hit
+	 */
+	skbc = sock_bc(sk);
+#if UB_SOCK_MAINTAIN_WMEMPRESSURE
+	if (!ub_hfbarrier_hit(ub, bufid)) {
+		if (ub->ub_wmem_pressure)
+			ub_debug(UBD_NET_SEND, "makewres: pressure -> 0 "
+				"sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
+				sk, size, skbc->poll_reserv,
+				ub->ub_parms[bufid].held,
+				skbc->ub_wcharged, sk->sk_sndbuf);
+		ub->ub_wmem_pressure = 0;
+	}
+#endif
+	if (ub_barrier_hit(ub, bufid)) {
+#if UB_SOCK_MAINTAIN_WMEMPRESSURE
+		if (!ub->ub_wmem_pressure)
+			ub_debug(UBD_NET_SEND, "makewres: pressure -> 1 "
+				"sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
+				sk, size, skbc->poll_reserv,
+				ub->ub_parms[bufid].held,
+				skbc->ub_wcharged, sk->sk_sndbuf);
+		ub->ub_wmem_pressure = 1;
+#endif
+		if (sk->sk_family == PF_NETLINK)
+			goto unroll;
+		wcharge_added = size - skbc->poll_reserv;
+		skbc->ub_wcharged += wcharge_added;
+		if (skbc->ub_wcharged * ub->ub_parms[bid2sid(bufid)].limit +
+				ub->ub_parms[bufid].barrier >
+					ub->ub_parms[bufid].limit)
+			goto unroll_wch;
+	}
+	if (ub->ub_parms[bufid].held > ub->ub_parms[bufid].limit)
+		goto unroll;
+
+	ub_adjust_maxheld(ub, bufid);
+	skbc->poll_reserv = size;
+out:
+	return 0;
+
+unroll_wch:
+	skbc->ub_wcharged -= wcharge_added;
+unroll:
+	ub_debug(UBD_NET_SEND,
+			"makewres: deny "
+			"sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
+			sk, size, skbc->poll_reserv, ub->ub_parms[bufid].held,
+			skbc->ub_wcharged, sk->sk_sndbuf);
+	ub->ub_parms[bufid].failcnt++;
+	ub->ub_parms[bufid].held -= size - skbc->poll_reserv;
+	return -ENOMEM;
+}
+
+int ub_sock_make_wreserv(struct sock *sk, int bufid, unsigned long size)
+{
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+	unsigned long flags;
+	unsigned long added_reserv;
+	int err;
+
+	skbc = sock_bc(sk);
+
+	/*
+	 * This function provides that there is sufficient reserve upon return
+	 * only if sk has only one user.  We can check poll_reserv without
+	 * serialization and avoid locking if the reserve already exists.
+	 */
+	if (unlikely(!sock_has_ubc(sk)) || likely(skbc->poll_reserv >= size))
+		return 0;
+
+	ub = top_beancounter(skbc->ub);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	added_reserv = -skbc->poll_reserv;
+	err = ub_sock_makewreserv_locked(sk, bufid, size);
+	added_reserv += skbc->poll_reserv;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	if (added_reserv)
+		charge_beancounter_notop(skbc->ub, bufid, added_reserv);
+
+	return err;
+}
+
+EXPORT_SYMBOL(ub_sock_make_wreserv);
+
+int ub_sock_get_wreserv(struct sock *sk, int bufid, unsigned long size)
+{
+	struct sock_beancounter *skbc;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return 0;
+
+	/* optimize for the case if socket has sufficient reserve */
+	ub_sock_make_wreserv(sk, bufid, size);
+	skbc = sock_bc(sk);
+	if (likely(skbc->poll_reserv >= size)) {
+		skbc->poll_reserv -= size;
+		return 0;
+	}
+	return -ENOMEM;
+}
+
+EXPORT_SYMBOL(ub_sock_get_wreserv);
+
+static void ub_sock_do_ret_wreserv(struct sock *sk, int bufid,
+		unsigned long size, unsigned long ressize)
+{
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+	unsigned long extra;
+	unsigned long flags;
+
+	skbc = sock_bc(sk);
+	ub = top_beancounter(skbc->ub);
+
+	extra = 0;
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	skbc->poll_reserv += size;
+	if (skbc->poll_reserv > ressize) {
+		extra = skbc->poll_reserv - ressize;
+		ub_sock_wcharge_dec(sk, extra);
+		skbc->poll_reserv = ressize;
+
+		__uncharge_beancounter_locked(ub, bufid, extra);
+		if (bufid == UB_TCPSNDBUF)
+			ub_tcp_snd_wakeup(ub);
+		else
+			ub_sock_snd_wakeup(ub);
+	}
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	if (extra)
+		uncharge_beancounter_notop(skbc->ub, bufid, extra);
+}
+
+void ub_sock_ret_wreserv(struct sock *sk, int bufid,
+		unsigned long size, unsigned long ressize)
+{
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return;
+
+	skbc = sock_bc(sk);
+	ub = top_beancounter(skbc->ub);
+	/* check if the reserve can be kept */
+	if (ub_barrier_farsz(ub, bufid)) {
+		skbc->poll_reserv += size;
+		return;
+	}
+	ub_sock_do_ret_wreserv(sk, bufid, size, ressize);
+}
+
+/*
+ * UB_DGRAMRCVBUF
+ */
+
+int ub_sockrcvbuf_charge(struct sock *sk, struct sk_buff *skb)
+{
+	unsigned long chargesize;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return 0;
+
+	chargesize = skb_charge_fullsize(skb);
+	if (charge_beancounter(sock_bc(sk)->ub, UB_DGRAMRCVBUF,
+				 chargesize, UB_HARD))
+		return -ENOMEM;
+
+	ub_skb_set_charge(skb, sk, chargesize, UB_DGRAMRCVBUF);
+	return 0;
+}
+
+EXPORT_SYMBOL(ub_sockrcvbuf_charge);
+
+static void ub_sockrcvbuf_uncharge(struct sk_buff *skb)
+{
+	uncharge_beancounter(skb_bc(skb)->ub, UB_DGRAMRCVBUF,
+			     skb_bc(skb)->charged);
+	ub_skb_set_uncharge(skb);
+}
+EXPORT_SYMBOL(ub_tcprcvbuf_charge_forced);
+
+/*
+ * UB_TCPRCVBUF
+ */
+
+int ub_sock_tcp_chargerecv(struct sock *sk, struct sk_buff *skb,
+			    enum ub_severity strict)
+{
+	int retval;
+	unsigned long flags;
+	struct user_beancounter *ub;
+	struct sock_beancounter *skbc;
+	unsigned long chargesize;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return 0;
+	skbc = sock_bc(sk);
+
+	chargesize = skb_charge_fullsize(skb);
+	if (likely(skbc->forw_space >= chargesize)) {
+		skbc->forw_space -= chargesize;
+		__ub_skb_set_charge(skb, sk, chargesize, UB_TCPRCVBUF);
+		return 0;
+	}
+
+	/*
+	 * Memory pressure reactions:
+	 *  1) set UB_RMEM_KEEP (clearing UB_RMEM_EXPAND)
+	 *  2) set UB_RMEM_SHRINK and tcp_clamp_window()
+	 *     tcp_collapse_queues() if rmem_alloc > rcvbuf
+	 *  3) drop OFO, tcp_purge_ofo()
+	 *  4) drop all.
+	 * Currently, we do #2 and #3 at once (which means that current
+	 * collapsing of OFO queue in tcp_collapse_queues() is a waste of time,
+	 * for example...)
+	 * On memory pressure we jump from #0 to #3, and when the pressure
+	 * subsides, to #1.
+	 */
+	retval = 0;
+	ub = top_beancounter(sock_bc(sk)->ub);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub->ub_parms[UB_TCPRCVBUF].held += chargesize;
+	if (ub->ub_parms[UB_TCPRCVBUF].held >
+			ub->ub_parms[UB_TCPRCVBUF].barrier &&
+			strict != UB_FORCE)
+		goto excess;
+	ub_adjust_maxheld(ub, UB_TCPRCVBUF);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+out:
+	if (retval == 0) {
+		charge_beancounter_notop(sock_bc(sk)->ub, UB_TCPRCVBUF,
+				chargesize);
+		ub_skb_set_charge(skb, sk, chargesize, UB_TCPRCVBUF);
+	}
+	return retval;
+
+excess:
+	ub->ub_rmem_pressure = UB_RMEM_SHRINK;
+	if (strict == UB_HARD)
+		retval = -ENOMEM;
+	if (ub->ub_parms[UB_TCPRCVBUF].held > ub->ub_parms[UB_TCPRCVBUF].limit)
+		retval = -ENOMEM;
+	/*
+	 * We try to leave numsock*maxadvmss as a reserve for sockets not
+	 * queueing any data yet (if the difference between the barrier and the
+	 * limit is enough for this reserve).
+	 */
+	if (ub->ub_parms[UB_TCPRCVBUF].held +
+			ub->ub_parms[UB_NUMTCPSOCK].limit * ub->ub_maxadvmss
+			> ub->ub_parms[UB_TCPRCVBUF].limit &&
+			atomic_read(&sk->sk_rmem_alloc))
+		retval = -ENOMEM;
+	if (retval) {
+		ub->ub_parms[UB_TCPRCVBUF].held -= chargesize;
+		ub->ub_parms[UB_TCPRCVBUF].failcnt++;
+	}
+	ub_adjust_maxheld(ub, UB_TCPRCVBUF);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	goto out;
+}
+EXPORT_SYMBOL(ub_sock_tcp_chargerecv);
+
+static void ub_tcprcvbuf_uncharge(struct sk_buff *skb)
+{
+	unsigned long flags;
+	unsigned long held, bar;
+	int prev_pres;
+	struct user_beancounter *ub;
+
+	ub = top_beancounter(skb_bc(skb)->ub);
+	if (ub_barrier_farsz(ub, UB_TCPRCVBUF)) {
+		sock_bc(skb->sk)->forw_space += skb_bc(skb)->charged;
+		ub_skb_set_uncharge(skb);
+		return;
+	}
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	if (ub->ub_parms[UB_TCPRCVBUF].held < skb_bc(skb)->charged) {
+		printk(KERN_ERR "Uncharging %d for tcprcvbuf of %p with %lu\n",
+				skb_bc(skb)->charged,
+				ub, ub->ub_parms[UB_TCPRCVBUF].held);
+		/* ass-saving bung */
+		skb_bc(skb)->charged = ub->ub_parms[UB_TCPRCVBUF].held;
+	}
+	ub->ub_parms[UB_TCPRCVBUF].held -= skb_bc(skb)->charged;
+	held = ub->ub_parms[UB_TCPRCVBUF].held;
+	bar = ub->ub_parms[UB_TCPRCVBUF].barrier;
+	prev_pres = ub->ub_rmem_pressure;
+	if (held <= bar - (bar >> 2))
+		ub->ub_rmem_pressure = UB_RMEM_EXPAND;
+	else if (held <= bar)
+		ub->ub_rmem_pressure = UB_RMEM_KEEP;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	uncharge_beancounter_notop(skb_bc(skb)->ub, UB_TCPRCVBUF,
+			skb_bc(skb)->charged);
+	ub_skb_set_uncharge(skb);
+}
+
+
+/*
+ * UB_OTHERSOCKBUF and UB_TCPSNDBUF
+ */
+
+static void ub_socksndbuf_uncharge(struct sk_buff *skb)
+{
+	unsigned long flags;
+	struct user_beancounter *ub, *cub;
+	unsigned long chargesize;
+
+	cub = skb_bc(skb)->ub;
+	ub = top_beancounter(cub);
+	chargesize = skb_bc(skb)->charged;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	__uncharge_beancounter_locked(ub, UB_OTHERSOCKBUF, chargesize);
+	if (skb->sk != NULL && sock_has_ubc(skb->sk))
+		ub_sock_wcharge_dec(skb->sk, chargesize);
+	ub_sock_snd_wakeup(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	uncharge_beancounter_notop(cub, UB_OTHERSOCKBUF, chargesize);
+	ub_skb_set_uncharge(skb);
+}
+
+/* expected to be called under socket lock */
+static void ub_tcpsndbuf_uncharge(struct sk_buff *skb)
+{
+	/*
+	 * ub_sock_ret_wreserv call is abused here, we just want to uncharge
+	 * skb size.  However, to reduce duplication of the code doing
+	 * ub_hfbarrier_hit check, ub_wcharged reduction, and wakeup we call
+	 * a function that already does all of this.  2006/04/27  SAW
+	 */
+	ub_sock_ret_wreserv(skb->sk, UB_TCPSNDBUF, skb_bc(skb)->charged,
+			sock_bc(skb->sk)->poll_reserv);
+	ub_skb_set_uncharge(skb);
+}
+
+void ub_skb_uncharge(struct sk_buff *skb)
+{
+	switch (skb_bc(skb)->resource) {
+		case UB_TCPSNDBUF:
+			ub_tcpsndbuf_uncharge(skb);
+			break;
+		case UB_TCPRCVBUF:
+			ub_tcprcvbuf_uncharge(skb);
+			break;
+		case UB_DGRAMRCVBUF:
+			ub_sockrcvbuf_uncharge(skb);
+			break;
+		case UB_OTHERSOCKBUF:
+			ub_socksndbuf_uncharge(skb);
+			break;
+	}
+}
+
+EXPORT_SYMBOL(ub_skb_uncharge);	/* due to skb_orphan()/conntracks */
+
+/*
+ * Other sock reserve managment
+ */
+
+int ub_sock_getwres_other(struct sock *sk, unsigned long size)
+{
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+	unsigned long flags;
+	unsigned long added_reserv;
+	int err;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return 0;
+
+	/*
+	 * Nothing except beancounter lock protects skbc->poll_reserv.
+	 * So, take the lock and do the job.
+	 * Dances with added_reserv repeat ub_sock_make_wreserv.
+	 */
+	skbc = sock_bc(sk);
+	ub = top_beancounter(skbc->ub);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	added_reserv = -skbc->poll_reserv;
+	err = ub_sock_makewreserv_locked(sk, UB_OTHERSOCKBUF, size);
+	added_reserv += skbc->poll_reserv;
+	if (!err)
+		skbc->poll_reserv -= size;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	if (added_reserv)
+		charge_beancounter_notop(skbc->ub, UB_OTHERSOCKBUF, added_reserv);
+
+	return err;
+}
+EXPORT_SYMBOL(ub_sock_getwres_other);
+
+void ub_sock_retwres_other(struct sock *sk,
+		unsigned long size, unsigned long ressize)
+{
+	if (unlikely(!sock_has_ubc(sk)))
+		return;
+
+	ub_sock_do_ret_wreserv(sk, UB_OTHERSOCKBUF, size, ressize);
+}
+EXPORT_SYMBOL(ub_tcpsndbuf_charge_forced);
+
+/*
+ * TCP send buffers accouting. Paged part
+ */
+
+int ub_sock_tcp_chargepage(struct sock *sk)
+{
+	struct sock_beancounter *skbc;
+	unsigned long extra;
+	int err;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return 0;
+
+	skbc = sock_bc(sk);
+	ub_sock_make_wreserv(sk, UB_TCPSNDBUF, PAGE_SIZE);
+	if (likely(skbc->poll_reserv >= PAGE_SIZE)) {
+		skbc->poll_reserv -= PAGE_SIZE;
+		return 0;
+	}
+
+	/*
+	 * Ok, full page is not available.
+	 * However, this function must succeed if poll previously indicated
+	 * that write is possible.  We better make a forced charge here
+	 * than reserve a whole page in poll.
+	 */
+	err = ub_sock_make_wreserv(sk, UB_TCPSNDBUF, SOCK_MIN_UBCSPACE);
+	if (unlikely(err < 0))
+		goto out;
+	if (skbc->poll_reserv < PAGE_SIZE) {
+		extra = PAGE_SIZE - skbc->poll_reserv;
+		err = charge_beancounter(skbc->ub, UB_TCPSNDBUF, extra,
+				UB_FORCE);
+		if (err < 0)
+			goto out;
+		skbc->poll_reserv += extra;
+	}
+	skbc->poll_reserv -= PAGE_SIZE;
+	return 0;
+
+out:
+	return err;
+}
+
+void ub_sock_tcp_detachpage(struct sock *sk)
+{
+	struct sk_buff *skb;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return;
+
+	/* The page is just detached from socket. The last skb in queue
+	   with paged part holds referrence to it */
+	skb = skb_peek_tail(&sk->sk_write_queue);
+	if (skb == NULL) {
+	   	/* If the queue is empty - all data is sent and page is about
+		   to be freed */
+		ub_sock_ret_wreserv(sk, UB_TCPSNDBUF, PAGE_SIZE,
+				sock_bc(sk)->poll_reserv);
+	} else {
+		/* Last skb is a good aproximation for a last skb with
+		   paged part */
+		skb_bc(skb)->charged += PAGE_SIZE;
+	}
+}
+
+/*
+ * TCPSNDBUF charge functions below are called in the following cases:
+ *  - sending of SYN, SYN-ACK, FIN, the latter charge is forced by
+ *    some technical reasons in TCP code;
+ *  - fragmentation of TCP packets.
+ * These functions are allowed but not required to use poll_reserv.
+ * Originally, these functions didn't do that, since it didn't make
+ * any sense.  Now, since poll_reserv now has a function of general reserve,
+ * they use it.
+ */
+int ub_sock_tcp_chargesend(struct sock *sk, struct sk_buff *skb,
+			    enum ub_severity strict)
+{
+	int ret;
+	unsigned long chargesize;
+	struct sock_beancounter *skbc;
+	struct user_beancounter *ub;
+	unsigned long flags;
+
+	if (unlikely(!sock_has_ubc(sk)))
+		return 0;
+
+	skbc = sock_bc(sk);
+	chargesize = skb_charge_fullsize(skb);
+	if (likely(skbc->poll_reserv >= chargesize)) {
+		skbc->poll_reserv -= chargesize;
+		__ub_skb_set_charge(skb, sk, chargesize, UB_TCPSNDBUF);
+		/* XXX hack, see ub_skb_set_charge */
+		skb->sk = sk;
+		return 0;
+	}
+
+	ub = top_beancounter(skbc->ub);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ret = __charge_beancounter_locked(ub, UB_TCPSNDBUF,
+			chargesize, strict);
+	/*
+	 * Note: this check is not equivalent of the corresponding check
+	 * in makewreserv.  It's similar in spirit, but an equivalent check
+	 * would be too long and complicated here.
+	 */
+	if (!ret && ub_barrier_hit(ub, UB_TCPSNDBUF))
+		skbc->ub_wcharged += chargesize;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	if (likely(!ret)) {
+		charge_beancounter_notop(skbc->ub, UB_TCPSNDBUF, chargesize);
+		ub_skb_set_charge(skb, sk, chargesize, UB_TCPSNDBUF);
+	}
+	return ret;
+}
+EXPORT_SYMBOL(ub_sock_tcp_chargesend);
+
+void ub_sock_tcp_unchargesend(struct sock *sk, unsigned long size)
+{
+	if (unlikely(!sock_has_ubc(sk)))
+		return;
+	/* see ub_tcpsndbuf_uncharge */
+	ub_sock_ret_wreserv(sk, UB_TCPSNDBUF, size, sock_bc(sk)->poll_reserv);
+}
+
+/*
+ * Initialization
+ */
+int __init skbc_cache_init(void)
+{
+	return 0;
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/ub/ub_oom.c linux-2.6.16.46-0.12-027test011/kernel/ub/ub_oom.c
--- linux-2.6.16.46-0.12.orig/kernel/ub/ub_oom.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/ub/ub_oom.c	2007-08-28 17:35:32.000000000 +0400
@@ -0,0 +1,200 @@
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/cpuset.h>
+#include <linux/module.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_oom.h>
+#include <ub/ub_hash.h>
+
+#define UB_OOM_TIMEOUT	(5 * HZ)
+
+int oom_generation;
+int oom_kill_counter;
+static DEFINE_SPINLOCK(oom_lock);
+static DECLARE_WAIT_QUEUE_HEAD(oom_wq);
+
+static inline int ub_oom_completed(struct task_struct *tsk)
+{
+	if (test_tsk_thread_flag(tsk, TIF_MEMDIE))
+		/* we were oom killed - just die */
+		return 1;
+	if (tsk->task_bc.oom_generation != oom_generation)
+		/* some task was succesfully killed */
+		return 1;
+	return 0;
+}
+
+static void ub_clear_oom(void)
+{
+	struct user_beancounter *ub;
+
+	rcu_read_lock();
+	for_each_beancounter(ub)
+		ub->ub_oom_noproc = 0;
+	rcu_read_unlock();
+}
+
+/* Called with cpuset_lock held */
+int ub_oom_lock(void)
+{
+	int timeout;
+	DEFINE_WAIT(oom_w);
+	struct task_struct *tsk;
+
+	tsk = current;
+
+	spin_lock(&oom_lock);
+	if (!oom_kill_counter)
+		goto out_do_oom;
+
+	timeout = UB_OOM_TIMEOUT;
+	while (1) {
+		if (ub_oom_completed(tsk)) {
+			spin_unlock(&oom_lock);
+			return -EINVAL;
+		}
+
+		if (timeout == 0)
+			break;
+
+		__set_current_state(TASK_UNINTERRUPTIBLE);
+		add_wait_queue(&oom_wq, &oom_w);
+		spin_unlock(&oom_lock);
+		cpuset_unlock();
+
+		timeout = schedule_timeout(timeout);
+
+		cpuset_lock();
+		spin_lock(&oom_lock);
+		remove_wait_queue(&oom_wq, &oom_w);
+	}
+
+out_do_oom:
+	ub_clear_oom();
+	return 0;
+}
+
+static inline long ub_current_overdraft(struct user_beancounter *ub)
+{
+	return ub->ub_parms[UB_OOMGUARPAGES].held +
+		((ub->ub_parms[UB_KMEMSIZE].held
+		  + ub->ub_parms[UB_TCPSNDBUF].held
+		  + ub->ub_parms[UB_TCPRCVBUF].held
+		  + ub->ub_parms[UB_OTHERSOCKBUF].held
+		  + ub->ub_parms[UB_DGRAMRCVBUF].held)
+		 >> PAGE_SHIFT) - ub->ub_parms[UB_OOMGUARPAGES].barrier;
+}
+
+int ub_oom_task_skip(struct user_beancounter *ub, struct task_struct *tsk)
+{
+	struct user_beancounter *mm_ub;
+
+	if (ub == NULL)
+		return 0;
+
+	task_lock(tsk);
+	if (tsk->mm == NULL)
+		mm_ub = NULL;
+	else
+		mm_ub = tsk->mm->mm_ub;
+
+	while (mm_ub != NULL && mm_ub != ub)
+		mm_ub = mm_ub->parent;
+	task_unlock(tsk);
+
+	return mm_ub != ub;
+}
+
+struct user_beancounter *ub_oom_select_worst(void)
+{
+	struct user_beancounter *ub, *walkp;
+	long ub_maxover;
+
+	ub_maxover = 0;
+	ub = NULL;
+
+	rcu_read_lock();
+	for_each_beancounter (walkp) {
+		long ub_overdraft;
+
+		if (walkp->parent != NULL)
+			continue;
+		if (walkp->ub_oom_noproc)
+			continue;
+
+		ub_overdraft = ub_current_overdraft(walkp);
+		if (ub_overdraft > ub_maxover && get_beancounter_rcu(walkp)) {
+			put_beancounter(ub);
+			ub = walkp;
+			ub_maxover = ub_overdraft;
+		}
+	}
+
+	if (ub)
+		ub->ub_oom_noproc = 1;
+	rcu_read_unlock();
+
+	return ub;
+}
+
+void ub_oom_mm_killed(struct user_beancounter *ub)
+{
+	static struct ub_rate_info ri = { 5, 60*HZ };
+
+	/* increment is serialized with oom_lock */
+	ub->ub_parms[UB_OOMGUARPAGES].failcnt++;
+
+	if (ub_ratelimit(&ri))
+		show_mem();
+}
+
+void ub_oom_unlock(void)
+{
+	spin_unlock(&oom_lock);
+}
+
+void ub_oom_task_dead(struct task_struct *tsk)
+{
+	spin_lock(&oom_lock);
+	oom_kill_counter = 0;
+	oom_generation++;
+
+	printk("OOM killed process %s (pid=%d, ve=%d) exited, "
+			"free=%u gen=%d.\n",
+			tsk->comm, tsk->pid, VEID(tsk->ve_task_info.owner_env),
+			nr_free_pages(), oom_generation);
+	/* if there is time to sleep in ub_oom_lock -> sleep will continue */
+	wake_up_all(&oom_wq);
+	spin_unlock(&oom_lock);
+}
+
+void ub_out_of_memory(struct user_beancounter *scope)
+{
+	struct user_beancounter *ub;
+	struct task_struct *p;
+
+	cpuset_lock();
+	spin_lock(&oom_lock);
+	ub_clear_oom();
+	ub = get_beancounter(scope);
+
+	read_lock(&tasklist_lock);
+retry:
+	p = oom_select_bad_process(ub);
+	if (p == NULL || PTR_ERR(p) == -1UL)
+		goto unlock;
+
+	if (oom_kill_process(p, "UB Out of memory"))
+		goto retry;
+
+	put_beancounter(ub);
+
+unlock:
+	read_unlock(&tasklist_lock);
+	spin_unlock(&oom_lock);
+	cpuset_unlock();
+}
+EXPORT_SYMBOL(ub_out_of_memory);
diff -upr linux-2.6.16.46-0.12.orig/kernel/ub/ub_page_bc.c linux-2.6.16.46-0.12-027test011/kernel/ub/ub_page_bc.c
--- linux-2.6.16.46-0.12.orig/kernel/ub/ub_page_bc.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/ub/ub_page_bc.c	2007-08-28 17:35:30.000000000 +0400
@@ -0,0 +1,437 @@
+/*
+ *  kernel/ub/ub_page_bc.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/gfp.h>
+#include <linux/vmalloc.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_hash.h>
+#include <ub/ub_vmpages.h>
+#include <ub/ub_page.h>
+#include <ub/io_acct.h>
+
+static kmem_cache_t *pb_cachep;
+spinlock_t pb_lock = SPIN_LOCK_UNLOCKED;
+static struct page_beancounter **pb_hash_table;
+static unsigned int pb_hash_mask;
+
+/*
+ * Auxiliary staff
+ */
+
+static inline struct page_beancounter *next_page_pb(struct page_beancounter *p)
+{
+	return list_entry(p->page_list.next, struct page_beancounter,
+			page_list);
+}
+
+static inline struct page_beancounter *prev_page_pb(struct page_beancounter *p)
+{
+	return list_entry(p->page_list.prev, struct page_beancounter,
+			page_list);
+}
+
+/*
+ * Held pages manipulation
+ */
+static inline void set_held_pages(struct user_beancounter *bc)
+{
+	/* all three depend on ub_held_pages */
+	__ub_update_physpages(bc);
+	__ub_update_oomguarpages(bc);
+	__ub_update_privvm(bc);
+}
+
+static inline void do_dec_held_pages(struct user_beancounter *ub, int value)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub->ub_held_pages -= value;
+	set_held_pages(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+static void dec_held_pages(struct user_beancounter *ub, int value)
+{
+	for (; ub != NULL; ub = ub->parent)
+		do_dec_held_pages(ub, value);
+}
+
+static inline void do_inc_held_pages(struct user_beancounter *ub, int value)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub->ub_held_pages += value;
+	set_held_pages(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+static void inc_held_pages(struct user_beancounter *ub, int value)
+{
+	for (; ub != NULL; ub = ub->parent)
+		do_inc_held_pages(ub, value);
+}
+
+/*
+ * Alloc - free
+ */
+
+inline int pb_alloc(struct page_beancounter **pbc)
+{
+	*pbc = kmem_cache_alloc(pb_cachep, GFP_KERNEL);
+	if (*pbc != NULL) {
+		(*pbc)->next_hash = NULL;
+		(*pbc)->pb_magic = PB_MAGIC;
+	}
+	return (*pbc == NULL);
+}
+
+inline void pb_free(struct page_beancounter **pb)
+{
+	if (*pb != NULL) {
+		kmem_cache_free(pb_cachep, *pb);
+		*pb = NULL;
+	}
+}
+
+void pb_free_list(struct page_beancounter **p_pb)
+{
+	struct page_beancounter *list, *pb;
+
+	list = *p_pb;
+	if (list == PBC_COPY_SAME)
+		return;
+
+	while (list) {
+		pb = list;
+		list = list->next_hash;
+		pb_free(&pb);
+	}
+	*p_pb = NULL;
+}
+
+/*
+ * head -> <new objs> -> <old objs> -> ...
+ */
+static int __alloc_list(struct page_beancounter **head, int num)
+{
+	struct page_beancounter *pb;
+
+	while (num > 0) {
+		if (pb_alloc(&pb))
+			return -1;
+		pb->next_hash = *head;
+		*head = pb;
+		num--;
+	}
+
+	return num;
+}
+
+/*
+ * Ensure that the list contains at least num elements.
+ * p_pb points to an initialized list, may be of the zero length.
+ *
+ * mm->page_table_lock should be held
+ */
+int pb_alloc_list(struct page_beancounter **p_pb, int num)
+{
+	struct page_beancounter *list;
+
+	for (list = *p_pb; list != NULL && num; list = list->next_hash, num--);
+	if (!num)
+		return 0;
+
+	/*
+	 *  *p_pb(after)       *p_pb (before)
+	 *     \                  \
+	 *     <new objs> -...-> <old objs> -> ...
+	 */
+	if (__alloc_list(p_pb, num) < 0)
+		goto nomem;
+	return 0;
+
+nomem:
+	pb_free_list(p_pb);
+	return -ENOMEM;
+}
+
+/*
+ * Allocates a page_beancounter for each
+ * user_beancounter in a hash
+ */
+int pb_alloc_all(struct page_beancounter **pbs)
+{
+	int need_alloc;
+	struct user_beancounter *ub;
+
+	need_alloc = 0;
+	rcu_read_lock();
+	for_each_beancounter(ub)
+		need_alloc++;
+	rcu_read_unlock();
+
+	if (!__alloc_list(pbs, need_alloc))
+		return 0;
+
+	pb_free_list(pbs);
+	return -ENOMEM;
+}
+
+/*
+ * Hash routines
+ */
+
+static inline int pb_hash(struct user_beancounter *ub, struct page *page)
+{
+	return (page_to_pfn(page) + (ub->ub_uid << 10)) & pb_hash_mask;
+}
+
+/* pb_lock should be held */
+static inline void insert_pb(struct page_beancounter *p, struct page *page,
+		struct user_beancounter *ub, int hash)
+{
+	p->page = page;
+	p->ub = get_beancounter(ub);
+	p->next_hash = pb_hash_table[hash];
+	pb_hash_table[hash] = p;
+	inc_pbc_count(ub);
+}
+
+/*
+ * Heart
+ */
+
+static int __pb_dup_ref(struct page *page, struct user_beancounter *bc,
+		int hash)
+{
+	struct page_beancounter *p;
+
+	for (p = pb_hash_table[hash];
+			p != NULL && (p->page != page || p->ub != bc);
+			p = p->next_hash);
+	if (p == NULL)
+		return -1;
+
+	PB_COUNT_INC(p->refcount);
+	return 0;
+}
+
+static void __pb_add_ref(struct page *page, struct user_beancounter *bc,
+		struct page_beancounter **ppb, int hash)
+{
+	struct page_beancounter *head, *p, **hp;
+	int shift;
+
+	p = *ppb;
+	*ppb = p->next_hash;
+  
+	insert_pb(p, page, bc, hash);
+	hp = page_pblist(page);
+	head = *hp;
+
+	if (head != NULL) {
+		/*
+		 * Move the first element to the end of the list.
+		 * List head (pb_head) is set to the next entry.
+		 * Note that this code works even if head is the only element
+		 * on the list (because it's cyclic).
+		 */
+		BUG_ON(head->pb_magic != PB_MAGIC);
+		*hp = next_page_pb(head);
+		PB_SHIFT_INC(head->refcount);
+		shift = PB_SHIFT_GET(head->refcount);
+		/*
+		 * Update user beancounter, the share of head has been changed.
+		 * Note that the shift counter is taken after increment.
+		 */
+		dec_held_pages(head->ub, UB_PAGE_WEIGHT >> shift);
+		/* add the new page beancounter to the end of the list */
+		head = *hp;
+		list_add_tail(&p->page_list, &head->page_list);
+	} else {
+		*hp = p;
+		shift = 0;
+		INIT_LIST_HEAD(&p->page_list);
+	}
+
+	p->refcount = PB_REFCOUNT_MAKE(shift, 1);
+	/* update user beancounter for the new page beancounter */
+	inc_held_pages(bc, UB_PAGE_WEIGHT >> shift);
+}
+
+void pb_add_ref(struct page *page, struct mm_struct *mm,
+		struct page_beancounter **p_pb)
+{
+	int hash;
+	struct user_beancounter *bc;
+
+	bc = mm->mm_ub;
+	if (bc == NULL)
+		return;
+
+	if (!PageAnon(page) && is_shmem_mapping(page->mapping))
+		return;
+
+	hash = pb_hash(bc, page);
+
+	spin_lock(&pb_lock);
+	if (__pb_dup_ref(page, bc, hash))
+		__pb_add_ref(page, bc, p_pb, hash);
+	spin_unlock(&pb_lock);
+}
+
+void pb_dup_ref(struct page *page, struct mm_struct *mm,
+		struct page_beancounter **p_pb)
+{
+	int hash;
+	struct user_beancounter *bc;
+
+	bc = mm->mm_ub;
+	if (bc == NULL)
+		return;
+
+	if (!PageAnon(page) && is_shmem_mapping(page->mapping))
+		return;
+
+	hash = pb_hash(bc, page);
+
+	spin_lock(&pb_lock);
+	if (*page_pblist(page) == NULL)
+		/*
+		 * pages like ZERO_PAGE must not be accounted in pbc
+		 * so on fork we just skip them
+		 */
+		goto out_unlock;
+
+	if (unlikely(*p_pb != PBC_COPY_SAME))
+		__pb_add_ref(page, bc, p_pb, hash);
+	else if (unlikely(__pb_dup_ref(page, bc, hash)))
+		WARN_ON(1);
+out_unlock:
+	spin_unlock(&pb_lock);
+}
+
+void pb_remove_ref(struct page *page, struct mm_struct *mm)
+{
+	int hash;
+	struct user_beancounter *bc;
+	struct page_beancounter *p, **q, *f;
+	int shift, shiftt;
+
+	bc = mm->mm_ub;
+	if (bc == NULL)
+		return;
+
+	if (!PageAnon(page) && is_shmem_mapping(page->mapping))
+		return;
+
+	hash = pb_hash(bc, page);
+
+	spin_lock(&pb_lock);
+	for (q = pb_hash_table + hash, p = *q;
+			p != NULL && (p->page != page || p->ub != bc);
+			q = &p->next_hash, p = *q);
+	if (p == NULL)
+		goto out_unlock;
+
+	PB_COUNT_DEC(p->refcount);
+	if (PB_COUNT_GET(p->refcount))
+		/*
+		 * More references from the same user beancounter exist.
+		 * Nothing needs to be done.
+		 */
+		goto out_unlock;
+
+	/* remove from the hash list */
+	f = p;
+	*q = p->next_hash;
+
+	shift = PB_SHIFT_GET(p->refcount);
+
+	dec_held_pages(p->ub, UB_PAGE_WEIGHT >> shift);
+
+	q = page_pblist(page);
+	if (*q == p) {
+		if (list_empty(&p->page_list)) {
+			*q = NULL;
+			goto out_free;
+		}
+
+		*q = next_page_pb(p);
+	}
+	list_del(&p->page_list);
+
+	/* Now balance the list.  Move the tail and adjust its shift counter. */
+	p = prev_page_pb(*q);
+	shiftt = PB_SHIFT_GET(p->refcount);
+	*q = p;
+	PB_SHIFT_DEC(p->refcount);
+
+	inc_held_pages(p->ub, UB_PAGE_WEIGHT >> shiftt);
+
+	/*
+	 * If the shift counter of the moved beancounter is different from the
+	 * removed one's, repeat the procedure for one more tail beancounter
+	 */
+	if (shiftt > shift) {
+		p = prev_page_pb(*q);
+		*q = p;
+		PB_SHIFT_DEC(p->refcount);
+		inc_held_pages(p->ub, UB_PAGE_WEIGHT >> shiftt);
+	}
+out_free:
+	dec_pbc_count(f->ub);
+	spin_unlock(&pb_lock);
+
+	put_beancounter(f->ub);
+	pb_free(&f);
+	return;
+
+out_unlock:
+	spin_unlock(&pb_lock);
+}
+
+struct user_beancounter *pb_grab_page_ub(struct page *page)
+{
+	struct page_beancounter *pb;
+	struct user_beancounter *ub;
+
+	spin_lock(&pb_lock);
+	pb = *page_pblist(page);
+	ub = (pb == NULL ? ERR_PTR(-EINVAL) :
+			get_beancounter(pb->ub));
+	spin_unlock(&pb_lock);
+	return ub;
+}
+
+void __init ub_init_pbc(void)
+{
+	unsigned long hash_size;
+
+	pb_cachep = kmem_cache_create("page_beancounter",
+			sizeof(struct page_beancounter), 0,
+			SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL, NULL);
+	hash_size = num_physpages >> 2;
+	for (pb_hash_mask = 1;
+		(hash_size & pb_hash_mask) != hash_size;
+		pb_hash_mask = (pb_hash_mask << 1) + 1);
+	hash_size = pb_hash_mask + 1;
+	printk(KERN_INFO "Page beancounter hash is %lu entries.\n", hash_size);
+	pb_hash_table = vmalloc(hash_size * sizeof(struct page_beancounter *));
+	memset(pb_hash_table, 0, hash_size * sizeof(struct page_beancounter *));
+
+	ub_init_io(pb_cachep);
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/ub/ub_pages.c linux-2.6.16.46-0.12-027test011/kernel/ub/ub_pages.c
--- linux-2.6.16.46-0.12.orig/kernel/ub/ub_pages.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/ub/ub_pages.c	2007-08-28 17:35:33.000000000 +0400
@@ -0,0 +1,549 @@
+/*
+ *  kernel/ub/ub_pages.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/virtinfo.h>
+#include <linux/module.h>
+#include <linux/shmem_fs.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+
+#include <asm/pgtable.h>
+#include <asm/page.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_vmpages.h>
+#include <ub/proc.h>
+
+static inline unsigned long pages_in_pte_range(struct vm_area_struct *vma,
+		pmd_t *pmd, unsigned long addr, unsigned long end,
+		unsigned long *ret)
+{
+	pte_t *pte;
+	spinlock_t *ptl;
+
+	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+	do {
+		if (!pte_none(*pte) && pte_present(*pte))
+			(*ret)++;
+	} while (pte++, addr += PAGE_SIZE, (addr != end));
+	pte_unmap_unlock(pte - 1, ptl);
+
+	return addr;
+}
+
+static inline unsigned long pages_in_pmd_range(struct vm_area_struct *vma,
+		pud_t *pud, unsigned long addr, unsigned long end,
+		unsigned long *ret)
+{
+	pmd_t *pmd;
+	unsigned long next;
+
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		if (pmd_none_or_clear_bad(pmd))
+			continue;
+		next = pages_in_pte_range(vma, pmd, addr, next, ret);
+	} while (pmd++, addr = next, (addr != end));
+
+	return addr;
+}
+
+static inline unsigned long pages_in_pud_range(struct vm_area_struct *vma,
+		pgd_t *pgd, unsigned long addr, unsigned long end,
+		unsigned long *ret)
+{
+	pud_t *pud;
+	unsigned long next;
+
+	pud = pud_offset(pgd, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		if (pud_none_or_clear_bad(pud))
+			continue;
+		next = pages_in_pmd_range(vma, pud, addr, next, ret);
+	} while (pud++, addr = next, (addr != end));
+
+	return addr;
+}
+
+unsigned long pages_in_vma_range(struct vm_area_struct *vma,
+		unsigned long addr, unsigned long end)
+{
+	pgd_t *pgd;
+	unsigned long next;
+	unsigned long ret;
+
+	ret = 0;
+	BUG_ON(addr >= end);
+	pgd = pgd_offset(vma->vm_mm, addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		if (pgd_none_or_clear_bad(pgd))
+			continue;
+		next = pages_in_pud_range(vma, pgd, addr, next, &ret);
+	} while (pgd++, addr = next, (addr != end));
+	return ret;
+}
+
+void fastcall __ub_update_physpages(struct user_beancounter *ub)
+{
+	ub->ub_parms[UB_PHYSPAGES].held = ub->ub_tmpfs_respages
+		+ (ub->ub_held_pages >> UB_PAGE_WEIGHT_SHIFT);
+	ub_adjust_maxheld(ub, UB_PHYSPAGES);
+}
+
+void fastcall __ub_update_oomguarpages(struct user_beancounter *ub)
+{
+	ub->ub_parms[UB_OOMGUARPAGES].held =
+		ub->ub_parms[UB_PHYSPAGES].held + ub->ub_swap_pages;
+	ub_adjust_maxheld(ub, UB_OOMGUARPAGES);
+}
+
+void fastcall __ub_update_privvm(struct user_beancounter *ub)
+{
+	ub->ub_parms[UB_PRIVVMPAGES].held =
+		(ub->ub_held_pages >> UB_PAGE_WEIGHT_SHIFT)
+		+ ub->ub_unused_privvmpages
+		+ ub->ub_parms[UB_SHMPAGES].held;
+	ub_adjust_maxheld(ub, UB_PRIVVMPAGES);
+}
+
+static inline int __charge_privvm_locked(struct user_beancounter *ub,
+		unsigned long s, enum ub_severity strict)
+{
+	if (__charge_beancounter_locked(ub, UB_PRIVVMPAGES, s, strict) < 0)
+		return -ENOMEM;
+
+	ub->ub_unused_privvmpages += s;
+	return 0;
+}
+
+static void __unused_privvm_dec_locked(struct user_beancounter *ub,
+		long size)
+{
+	/* catch possible overflow */
+	if (ub->ub_unused_privvmpages < size) {
+		uncharge_warn(ub, UB_UNUSEDPRIVVM,
+				size, ub->ub_unused_privvmpages);
+		size = ub->ub_unused_privvmpages;
+	}
+	ub->ub_unused_privvmpages -= size;
+	__ub_update_privvm(ub);
+}
+
+void __ub_unused_privvm_dec(struct mm_struct *mm, long size)
+{
+	unsigned long flags;
+	struct user_beancounter *ub;
+
+	ub = mm->mm_ub;
+	if (ub == NULL)
+		return;
+
+	ub = top_beancounter(ub);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	__unused_privvm_dec_locked(ub, size);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+void ub_unused_privvm_sub(struct mm_struct *mm,
+		struct vm_area_struct *vma, unsigned long count)
+{
+	if (VM_UB_PRIVATE(vma->vm_flags, vma->vm_file))
+		__ub_unused_privvm_dec(mm, count);
+}
+
+void ub_unused_privvm_add(struct mm_struct *mm,
+		struct vm_area_struct *vma, unsigned long size)
+{
+	unsigned long flags;
+	struct user_beancounter *ub;
+
+	ub = mm->mm_ub;
+	if (ub == NULL || !VM_UB_PRIVATE(vma->vm_flags, vma->vm_file))
+		return;
+
+	ub = top_beancounter(ub);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub->ub_unused_privvmpages += size;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+int ub_protected_charge(struct mm_struct *mm, unsigned long size,
+		unsigned long newflags, struct vm_area_struct *vma)
+{
+	unsigned long flags;
+	struct file *file;
+	struct user_beancounter *ub;
+
+	ub = mm->mm_ub;
+	if (ub == NULL)
+		return PRIVVM_NO_CHARGE;
+
+	flags = vma->vm_flags;
+	if (!((newflags ^ flags) & VM_WRITE))
+		return PRIVVM_NO_CHARGE;
+
+	file = vma->vm_file;
+	if (!VM_UB_PRIVATE(newflags | VM_WRITE, file))
+		return PRIVVM_NO_CHARGE;
+
+	if (flags & VM_WRITE)
+		return PRIVVM_TO_SHARED;
+
+	ub = top_beancounter(ub);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	if (__charge_privvm_locked(ub, size, UB_SOFT) < 0)
+		goto err;
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return PRIVVM_TO_PRIVATE;
+
+err:
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return PRIVVM_ERROR;
+}
+
+int ub_memory_charge(struct mm_struct *mm, unsigned long size,
+		unsigned vm_flags, struct file *vm_file, int sv)
+{
+	struct user_beancounter *ub, *ubl;
+	unsigned long flags;
+
+	ub = mm->mm_ub;
+	if (ub == NULL)
+		return 0;
+
+	size >>= PAGE_SHIFT;
+	if (size > UB_MAXVALUE)
+		return -EINVAL;
+
+	BUG_ON(sv != UB_SOFT && sv != UB_HARD);
+
+	if (vm_flags & VM_LOCKED) {
+		if (charge_beancounter(ub, UB_LOCKEDPAGES, size, sv))
+			goto out_err;
+	}
+	if (VM_UB_PRIVATE(vm_flags, vm_file)) {
+		ubl = top_beancounter(ub);
+		spin_lock_irqsave(&ubl->ub_lock, flags);
+		if (__charge_privvm_locked(ubl, size, sv))
+			goto out_private;
+		spin_unlock_irqrestore(&ubl->ub_lock, flags);
+	}
+	return 0;
+
+out_private:
+	spin_unlock_irqrestore(&ubl->ub_lock, flags);
+	if (vm_flags & VM_LOCKED)
+		uncharge_beancounter(ub, UB_LOCKEDPAGES, size);
+out_err:
+	return -ENOMEM;
+}
+
+void ub_memory_uncharge(struct mm_struct *mm, unsigned long size,
+		unsigned vm_flags, struct file *vm_file)
+{
+	struct user_beancounter *ub;
+	unsigned long flags;
+
+	ub = mm->mm_ub;
+	if (ub == NULL)
+		return;
+
+	size >>= PAGE_SHIFT;
+
+	if (vm_flags & VM_LOCKED)
+		uncharge_beancounter(ub, UB_LOCKEDPAGES, size);
+	if (VM_UB_PRIVATE(vm_flags, vm_file)) {
+		ub = top_beancounter(ub);
+		spin_lock_irqsave(&ub->ub_lock, flags);
+		__unused_privvm_dec_locked(ub, size);
+		spin_unlock_irqrestore(&ub->ub_lock, flags);
+	}
+}
+
+int ub_locked_charge(struct mm_struct *mm, unsigned long size)
+{
+	struct user_beancounter *ub;
+
+	ub = mm->mm_ub;
+	if (ub == NULL)
+		return 0;
+
+	return charge_beancounter(ub, UB_LOCKEDPAGES,
+			size >> PAGE_SHIFT, UB_HARD);
+}
+
+void ub_locked_uncharge(struct mm_struct *mm, unsigned long size)
+{
+	struct user_beancounter *ub;
+
+	ub = mm->mm_ub;
+	if (ub == NULL)
+		return;
+
+	uncharge_beancounter(ub, UB_LOCKEDPAGES, size >> PAGE_SHIFT);
+}
+
+int ub_lockedshm_charge(struct shmem_inode_info *shi, unsigned long size)
+{
+	struct user_beancounter *ub;
+
+	ub = shi->shmi_ub;
+	if (ub == NULL)
+		return 0;
+
+	return charge_beancounter(ub, UB_LOCKEDPAGES,
+			size >> PAGE_SHIFT, UB_HARD);
+}
+
+void ub_lockedshm_uncharge(struct shmem_inode_info *shi, unsigned long size)
+{
+	struct user_beancounter *ub;
+
+	ub = shi->shmi_ub;
+	if (ub == NULL)
+		return;
+
+	uncharge_beancounter(ub, UB_LOCKEDPAGES, size >> PAGE_SHIFT);
+}
+
+
+static inline void do_ub_tmpfs_respages_inc(struct user_beancounter *ub)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub->ub_tmpfs_respages++;
+	__ub_update_physpages(ub);
+	__ub_update_oomguarpages(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+void ub_tmpfs_respages_inc(struct shmem_inode_info *shi)
+{
+	struct user_beancounter *ub;
+
+	for (ub = shi->shmi_ub; ub != NULL; ub = ub->parent)
+		do_ub_tmpfs_respages_inc(ub);
+}
+
+static inline void do_ub_tmpfs_respages_sub(struct user_beancounter *ub,
+		unsigned long size)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	/* catch possible overflow */
+	if (ub->ub_tmpfs_respages < size) {
+		uncharge_warn(ub, UB_TMPFSPAGES,
+				size, ub->ub_tmpfs_respages);
+		size = ub->ub_tmpfs_respages;
+	}
+	ub->ub_tmpfs_respages -= size;
+	/* update values what is the most interesting */
+	__ub_update_physpages(ub);
+	__ub_update_oomguarpages(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+void ub_tmpfs_respages_sub(struct shmem_inode_info *shi,
+		unsigned long size)
+{
+	struct user_beancounter *ub;
+
+	for (ub = shi->shmi_ub; ub != NULL; ub = ub->parent)
+		do_ub_tmpfs_respages_sub(ub, size);
+}
+
+int ub_shmpages_charge(struct shmem_inode_info *shi, unsigned long size)
+{
+	int ret;
+	unsigned long flags;
+	struct user_beancounter *ub;
+
+	ub = shi->shmi_ub;
+	if (ub == NULL)
+		return 0;
+
+	ub = top_beancounter(ub);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ret = __charge_beancounter_locked(ub, UB_SHMPAGES, size, UB_HARD);
+	if (ret == 0)
+		__ub_update_privvm(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+	return ret;
+}
+
+void ub_shmpages_uncharge(struct shmem_inode_info *shi, unsigned long size)
+{
+	unsigned long flags;
+	struct user_beancounter *ub;
+
+	ub = shi->shmi_ub;
+	if (ub == NULL)
+		return;
+
+	ub = top_beancounter(ub);
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	__uncharge_beancounter_locked(ub, UB_SHMPAGES, size);
+	__ub_update_privvm(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+#ifdef CONFIG_USER_SWAP_ACCOUNTING
+static inline void do_ub_swapentry_inc(struct user_beancounter *ub)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub->ub_swap_pages++;
+	__ub_update_oomguarpages(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+void ub_swapentry_inc(struct swap_info_struct *si, pgoff_t num,
+		struct user_beancounter *ub)
+{
+	si->swap_ubs[num] = get_beancounter(ub);
+	for (; ub != NULL; ub = ub->parent)
+		do_ub_swapentry_inc(ub);
+}
+EXPORT_SYMBOL(ub_swapentry_inc);
+
+static inline void do_ub_swapentry_dec(struct user_beancounter *ub)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	if (ub->ub_swap_pages <= 0)
+		uncharge_warn(ub, UB_SWAPPAGES, 1, ub->ub_swap_pages);
+	else
+		ub->ub_swap_pages--;
+	__ub_update_oomguarpages(ub);
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
+
+void ub_swapentry_dec(struct swap_info_struct *si, pgoff_t num)
+{
+	struct user_beancounter *ub, *ubp;
+
+	ub = si->swap_ubs[num];
+	si->swap_ubs[num] = NULL;
+	for (ubp = ub; ubp != NULL; ubp = ubp->parent)
+		do_ub_swapentry_dec(ubp);
+	put_beancounter(ub);
+}
+EXPORT_SYMBOL(ub_swapentry_dec);
+
+int ub_swap_init(struct swap_info_struct *si, pgoff_t num)
+{
+	struct user_beancounter **ubs;
+
+	ubs = vmalloc(num * sizeof(struct user_beancounter *));
+	if (ubs == NULL)
+		return -ENOMEM;
+
+	memset(ubs, 0, num * sizeof(struct user_beancounter *));
+	si->swap_ubs = ubs;
+	return 0;
+}
+
+void ub_swap_fini(struct swap_info_struct *si)
+{
+	if (si->swap_ubs) {
+		vfree(si->swap_ubs);
+		si->swap_ubs = NULL;
+	}
+}
+#endif
+
+#ifdef CONFIG_PROC_FS
+static int bc_vmaux_show(struct seq_file *f, void *v)
+{
+	struct user_beancounter *ub;
+	unsigned long swap, unmap;
+	int i;
+
+	ub = seq_beancounter(f);
+
+	swap = unmap = 0;
+	for_each_online_cpu(i) {
+		swap += per_cpu_ptr(ub->ub_percpu, i)->swapin;
+		unmap += per_cpu_ptr(ub->ub_percpu, i)->unmap;
+	}
+
+	seq_printf(f, bc_proc_lu_fmt, ub_rnames[UB_UNUSEDPRIVVM],
+			ub->ub_unused_privvmpages);
+	seq_printf(f, bc_proc_lu_fmt, ub_rnames[UB_TMPFSPAGES],
+			ub->ub_tmpfs_respages);
+	seq_printf(f, bc_proc_lu_fmt, ub_rnames[UB_SWAPPAGES],
+			ub->ub_swap_pages);
+
+	seq_printf(f, bc_proc_lu_fmt, "swapin", swap);
+	seq_printf(f, bc_proc_lu_fmt, "unmap", unmap);
+	return 0;
+}
+static struct bc_proc_entry bc_vmaux_entry = {
+	.name = "vmaux",
+	.u.show = bc_vmaux_show,
+};
+
+static int __init bc_vmaux_init(void)
+{
+	bc_register_proc_entry(&bc_vmaux_entry);
+	return 0;
+}
+
+late_initcall(bc_vmaux_init);
+#endif
+
+static int vmguar_enough_memory(struct vnotifier_block *self,
+		unsigned long event, void *arg, int old_ret)
+{
+	struct user_beancounter *ub;
+
+	if (event != VIRTINFO_ENOUGHMEM)
+		return old_ret;
+	/*
+	 * If it's a kernel thread, don't care about it.
+	 * Added in order aufsd to run smoothly over ramfs.
+	 */
+	if (!current->mm)
+		return NOTIFY_DONE;
+
+	for (ub = current->mm->mm_ub; ub->parent != NULL; ub = ub->parent);
+	if (ub->ub_parms[UB_PRIVVMPAGES].held >
+			ub->ub_parms[UB_VMGUARPAGES].barrier)
+		return old_ret;
+
+	return NOTIFY_OK;
+}
+
+static struct vnotifier_block vmguar_notifier_block = {
+	.notifier_call = vmguar_enough_memory
+};
+
+static int __init init_vmguar_notifier(void)
+{
+	virtinfo_notifier_register(VITYPE_GENERAL, &vmguar_notifier_block);
+	return 0;
+}
+
+static void __exit fini_vmguar_notifier(void)
+{
+	virtinfo_notifier_unregister(VITYPE_GENERAL, &vmguar_notifier_block);
+}
+
+module_init(init_vmguar_notifier);
+module_exit(fini_vmguar_notifier);
diff -upr linux-2.6.16.46-0.12.orig/kernel/ub/ub_proc.c linux-2.6.16.46-0.12-027test011/kernel/ub/ub_proc.c
--- linux-2.6.16.46-0.12.orig/kernel/ub/ub_proc.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/ub/ub_proc.c	2007-08-28 17:35:31.000000000 +0400
@@ -0,0 +1,676 @@
+/*
+ *  kernel/ub/proc.c 
+ *
+ *  Copyright (C) 2006 OpenVZ. SWsoft Inc.
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_hash.h>
+#include <ub/ub_page.h>
+#include <ub/proc.h>
+
+/* Generic output formats */
+#if BITS_PER_LONG == 32
+const char *bc_proc_lu_fmt = "\t%-20s %10lu\n";
+const char *bc_proc_lu_lfmt = "\t%-20s %21lu\n";
+const char *bc_proc_llu_fmt = "\t%-20s %21llu\n";
+const char *bc_proc_lu_lu_fmt = "\t%-20s %10lu %10lu\n";
+#else
+const char *bc_proc_lu_fmt = "\t%-20s %21lu\n";
+const char *bc_proc_lu_lfmt = "\t%-20s %21lu\n";
+const char *bc_proc_llu_fmt = "\t%-20s %21llu\n";
+const char *bc_proc_lu_lu_fmt = "\t%-20s %21lu %21lu\n";
+#endif
+
+#if BITS_PER_LONG == 32
+static const char *head_fmt = "%10s  %-12s %10s %10s %10s %10s %10s\n";
+static const char *res_fmt = "%10s  %-12s %10lu %10lu %10lu %10lu %10lu\n";
+#else
+static const char *head_fmt = "%10s  %-12s %20s %20s %20s %20s %20s\n";
+static const char *res_fmt = "%10s  %-12s %20lu %20lu %20lu %20lu %20lu\n";
+#endif
+
+static void ub_show_res(struct seq_file *f, struct user_beancounter *ub,
+		int r, int show_uid)
+{
+	int len;
+	char ub_uid[64];
+
+	if (show_uid && r == 0) {
+		len = print_ub_uid(ub, ub_uid, sizeof(ub_uid) - 2);
+		ub_uid[len] = ':';
+		ub_uid[len + 1] = '\0';
+	} else
+		strcpy(ub_uid, "");
+
+	seq_printf(f, res_fmt, ub_uid, ub_rnames[r],
+			ub->ub_parms[r].held,
+			ub->ub_parms[r].maxheld,
+			ub->ub_parms[r].barrier,
+			ub->ub_parms[r].limit,
+			ub->ub_parms[r].failcnt);
+}
+
+static void __show_resources(struct seq_file *f, struct user_beancounter *ub,
+		int show_uid)
+{
+	int i;
+
+	for (i = 0; i < UB_RESOURCES_COMPAT; i++)
+		if (strcmp(ub_rnames[i], "dummy") != 0)
+			ub_show_res(f, ub, i, show_uid);
+
+	for (i = UB_RESOURCES_COMPAT; i < UB_RESOURCES; i++)
+		ub_show_res(f, ub, i, show_uid);
+}
+
+static int bc_resources_show(struct seq_file *f, void *v)
+{
+	__show_resources(f, seq_beancounter(f), 0);
+	return 0;
+}
+
+static struct bc_proc_entry bc_resources_entry = {
+	.name = "resources",
+	.u.show = bc_resources_show,
+};
+
+static int bc_debug_show(struct seq_file *f, void *v)
+{
+	struct user_beancounter *ub;
+	char buf[64];
+
+	ub = seq_beancounter(f);
+	print_ub_uid(ub, buf, sizeof(buf));
+	seq_printf(f, "uid: %s\n", buf);
+	seq_printf(f, "ref: %d\n", atomic_read(&ub->ub_refcount));
+
+	seq_printf(f, "bc: %p\n", ub);
+	seq_printf(f, "par: %p\n", ub->parent);
+	seq_printf(f, "priv: %p\n", ub->private_data);
+	return 0;
+}
+
+static struct bc_proc_entry bc_debug_entry = {
+	.name = "debug",
+	.u.show = bc_debug_show,
+};
+
+static int ub_show(struct seq_file *f, void *v)
+{
+	int i;
+
+	for (i = 0; i < UB_RESOURCES_COMPAT; i++)
+		ub_show_res(f, (struct user_beancounter *)v, i, 1);
+	return 0;
+}
+
+static int res_show(struct seq_file *f, void *v)
+{
+	__show_resources(f, (struct user_beancounter *)v, 1);
+	return 0;
+}
+
+static int ub_accessible(struct user_beancounter *exec,
+		struct user_beancounter *target)
+{
+	struct user_beancounter *p, *q;
+
+	p = top_beancounter(exec);
+	q = top_beancounter(target);
+
+	return (p == get_ub0() || p == q);
+}
+
+static void ub_show_header(struct seq_file *f)
+{
+	seq_printf(f, "Version: 2.5\n");
+	seq_printf(f, head_fmt, "uid", "resource",
+			"held", "maxheld", "barrier", "limit", "failcnt");
+}
+
+static void *ub_start(struct seq_file *f, loff_t *ppos)
+{
+	struct user_beancounter *ub;
+	struct user_beancounter *exec_ub; 
+	unsigned long pos;
+
+	pos = *ppos;
+	if (pos == 0)
+		ub_show_header(f);
+
+	exec_ub = get_exec_ub();
+
+	rcu_read_lock();
+	for_each_beancounter(ub) {
+		if (ub->parent != NULL)
+			continue;
+		if (!ub_accessible(exec_ub, ub))
+			continue;
+		if (pos-- == 0)
+			return ub;
+	}
+	return NULL;
+}
+
+static void *ub_next(struct seq_file *f, void *v, loff_t *ppos)
+{
+	struct user_beancounter *ub;
+	struct list_head *entry;
+	struct user_beancounter *exec_ub;
+
+	exec_ub = get_exec_ub();
+	ub = (struct user_beancounter *)v;
+
+	entry = &ub->ub_list;
+
+	list_for_each_continue_rcu(entry, &ub_list_head) {
+		ub = list_entry(entry, struct user_beancounter, ub_list);
+		if (ub->parent != NULL)
+			continue;
+		if (!ub_accessible(exec_ub, ub))
+			continue;
+
+		(*ppos)++;
+		return ub;
+	}
+	return NULL;
+}
+
+static void ub_stop(struct seq_file *f, void *v)
+{
+	rcu_read_unlock();
+}
+
+static struct seq_operations ub_seq_ops = {
+	.start = ub_start,
+	.next  = ub_next,
+	.stop  = ub_stop,
+	.show  = ub_show,
+};
+
+static int ub_open(struct inode *inode, struct file *filp)
+{
+	if (!(capable(CAP_DAC_OVERRIDE) && capable(CAP_DAC_READ_SEARCH)))
+		return -EACCES;
+
+	return seq_open(filp, &ub_seq_ops);
+}
+
+static struct file_operations ub_file_operations = {
+	.open		= ub_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static struct seq_operations res_seq_ops = {
+	.start = ub_start,
+	.next  = ub_next,
+	.stop  = ub_stop,
+	.show  = res_show,
+};
+
+static int res_open(struct inode *inode, struct file *filp)
+{
+	if (!(capable(CAP_DAC_OVERRIDE) && capable(CAP_DAC_READ_SEARCH)))
+		return -EACCES;
+
+	return seq_open(filp, &res_seq_ops);
+}
+
+static struct file_operations resources_operations = {
+	.open		= res_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static struct bc_proc_entry bc_all_resources_entry = {
+	.name = "resources",
+	.u.fops = &resources_operations,
+};
+
+/*
+ * Generic showing stuff
+ */
+
+static int cookies, num_entries;
+static struct bc_proc_entry *bc_entries __read_mostly;
+static struct bc_proc_entry *bc_root_entries __read_mostly;
+static DEFINE_SPINLOCK(bc_entries_lock);
+
+void bc_register_proc_entry(struct bc_proc_entry *e)
+{
+	spin_lock(&bc_entries_lock);
+	e->cookie = ++cookies;
+	e->next = bc_entries;
+	bc_entries = e;
+	num_entries++;
+	spin_unlock(&bc_entries_lock);
+}
+
+EXPORT_SYMBOL(bc_register_proc_entry);
+
+void bc_register_proc_root_entry(struct bc_proc_entry *e)
+{
+	spin_lock(&bc_entries_lock);
+	e->cookie = ++cookies;
+	e->next = bc_root_entries;
+	bc_root_entries = e;
+	spin_unlock(&bc_entries_lock);
+}
+
+EXPORT_SYMBOL(bc_register_proc_root_entry);
+
+/*
+ * small helpers
+ */
+
+static inline int bc_make_ino(struct user_beancounter *ub)
+{
+	int ret;
+
+	ret = 0xbc000000;
+	if (ub->parent)
+		ret |= (ub->ub_uid << 12);
+	ret |= ub->ub_uid;
+	return ret;
+}
+
+static inline int bc_make_file_ino(struct bc_proc_entry *de)
+{
+	return 0xbe000000 + de->cookie;
+}
+
+static int bc_d_delete(struct dentry *d)
+{
+	return 1;
+}
+
+static void bc_d_release(struct dentry *d)
+{
+	put_beancounter((struct user_beancounter *)d->d_fsdata);
+}
+
+static struct inode_operations bc_entry_iops;
+static struct file_operations bc_entry_fops;
+static struct dentry_operations bc_dentry_ops = {
+	.d_delete = bc_d_delete,
+	.d_release = bc_d_release,
+};
+
+/*
+ * common directory operations' helpers
+ */
+
+static int bc_readdir(struct file *file, filldir_t filler, void *data,
+		struct user_beancounter *parent)
+{
+	int err = 0, len, ino;
+	loff_t pos, filled;
+	struct user_beancounter *ub, *prev;
+	char buf[64];
+	struct bc_proc_entry *pde;
+
+	if (!(capable(CAP_DAC_OVERRIDE) && capable(CAP_DAC_READ_SEARCH)))
+		return -EPERM;
+
+	pos = file->f_pos;
+	if (pos == 0) {
+		err = (*filler)(data, ".", 1, pos,
+				file->f_dentry->d_inode->i_ino, DT_DIR);
+		if (err < 0) {
+			err = 0;
+			goto out;
+		}
+		pos++;
+	}
+
+	if (pos == 1) {
+		err = (*filler)(data, "..", 2, pos,
+				parent_ino(file->f_dentry), DT_DIR);
+		if (err < 0) {
+			err = 0;
+			goto out;
+		}
+		pos++;
+	}
+
+	filled = 2;
+	for (pde = (parent == NULL ? bc_root_entries : bc_entries);
+			pde != NULL; pde = pde->next) {
+		if (filled++ < pos)
+			continue;
+
+		err = (*filler)(data, pde->name, strlen(pde->name), pos,
+				bc_make_file_ino(pde), DT_REG);
+		if (err < 0) {
+			err = 0;
+			goto out;
+		}
+		pos++;
+	}
+
+	rcu_read_lock();
+	prev = NULL;
+	ub = list_entry(&ub_list_head, struct user_beancounter, ub_list);
+	while (1) {
+		ub = list_entry(rcu_dereference(ub->ub_list.next),
+				struct user_beancounter, ub_list);
+		if (&ub->ub_list == &ub_list_head)
+			break;
+
+		if (ub->parent != parent)
+			continue;
+
+		if (filled++ < pos)
+			continue;
+
+		if (!get_beancounter_rcu(ub))
+			continue;
+
+		rcu_read_unlock();
+		put_beancounter(prev);
+
+		len = print_ub_uid(ub, buf, sizeof(buf));
+		ino = bc_make_ino(ub);
+
+		err = (*filler)(data, buf, len, pos, ino, DT_DIR);
+		if (err < 0) {
+			err = 0;
+			put_beancounter(ub);
+			goto out;
+		}
+
+		rcu_read_lock();
+		prev = ub;
+		pos++;
+	}
+	rcu_read_unlock();
+	put_beancounter(prev);
+out:
+	file->f_pos = pos;
+	return err;
+}
+
+static int bc_looktest(struct inode *ino, void *data)
+{
+	return ino->i_op == &bc_entry_iops && ino->u.generic_ip == data;
+}
+
+static int bc_lookset(struct inode *ino, void *data)
+{
+	struct user_beancounter *ub;
+
+	ub = (struct user_beancounter *)data;
+	ino->u.generic_ip = data;
+	ino->i_ino = bc_make_ino(ub);
+	ino->i_fop = &bc_entry_fops;
+	ino->i_op = &bc_entry_iops;
+	ino->i_mode = S_IFDIR | S_IRUSR | S_IXUGO;
+	/* subbeancounters are not included, but who cares? */
+	ino->i_nlink = num_entries + 2;
+	ino->i_gid = 0;
+	ino->i_uid = 0;
+	return 0;
+}
+
+static struct dentry *bc_lookup(struct user_beancounter *ub, struct inode *dir,
+		struct dentry *dentry)
+{
+	struct inode *ino;
+
+	ino = iget5_locked(dir->i_sb, ub->ub_uid, bc_looktest, bc_lookset, ub);
+	if (ino == NULL)
+		goto out_put;
+
+	unlock_new_inode(ino);
+	dentry->d_op = &bc_dentry_ops;
+	dentry->d_fsdata = ub;
+	d_add(dentry, ino);
+	return NULL;
+
+out_put:
+	put_beancounter(ub);
+	return ERR_PTR(-ENOENT);
+}
+
+/*
+ * files (bc_proc_entry) manipulations
+ */
+
+static struct dentry *bc_lookup_file(struct inode *dir,
+		struct dentry *dentry, struct bc_proc_entry *root,
+		int (*test)(struct inode *, void *),
+		int (*set)(struct inode *, void *))
+{
+	struct bc_proc_entry *pde;
+	struct inode *ino;
+
+	for (pde = root; pde != NULL; pde = pde->next)
+		if (strcmp(pde->name, dentry->d_name.name) == 0)
+			break;
+
+	if (pde == NULL)
+		return ERR_PTR(-ESRCH);
+
+	ino = iget5_locked(dir->i_sb, pde->cookie, test, set, pde);
+	if (ino == NULL)
+		return ERR_PTR(-ENOENT);
+
+	unlock_new_inode(ino);
+	dentry->d_op = &bc_dentry_ops;
+	d_add(dentry, ino);
+	return NULL;
+}
+
+static int bc_file_open(struct inode *ino, struct file *filp)
+{
+	struct bc_proc_entry *de;
+	struct user_beancounter *ub;
+
+	de = (struct bc_proc_entry *)ino->u.generic_ip;
+	ub = (struct user_beancounter *)filp->f_dentry->d_parent->d_fsdata;
+	BUG_ON(ub->ub_magic != UB_MAGIC);
+
+	/*
+	 * ub can't disappear: we hold d_parent, he holds the beancounter
+	 */
+	return single_open(filp, de->u.show, ub);
+}
+
+static struct file_operations bc_file_ops = {
+	.open		= bc_file_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int bc_looktest_entry(struct inode *ino, void *data)
+{
+	return ino->i_fop == &bc_file_ops && ino->u.generic_ip == data;
+}
+
+static int bc_lookset_entry(struct inode *ino, void *data)
+{
+	struct bc_proc_entry *de;
+
+	de = (struct bc_proc_entry *)data;
+	ino->u.generic_ip = data;
+	ino->i_ino = bc_make_file_ino(de);
+	ino->i_fop = &bc_file_ops,
+	ino->i_mode = S_IFREG | S_IRUSR;
+	ino->i_nlink = 1;
+	ino->i_gid = 0;
+	ino->i_uid = 0;
+	return 0;
+}
+
+static inline struct dentry *bc_lookup_files(struct inode *dir,
+		struct dentry *de)
+{
+	return bc_lookup_file(dir, de, bc_entries,
+			bc_looktest_entry, bc_lookset_entry);
+}
+
+static int bc_looktest_root_entry(struct inode *ino, void *data)
+{
+	struct bc_proc_entry *de;
+
+	de = (struct bc_proc_entry *)data;
+	return ino->i_fop == de->u.fops && ino->u.generic_ip == data;
+}
+
+static int bc_lookset_root_entry(struct inode *ino, void *data)
+{
+	struct bc_proc_entry *de;
+
+	de = (struct bc_proc_entry *)data;
+	ino->u.generic_ip = data;
+	ino->i_ino = bc_make_file_ino(de);
+	ino->i_fop = de->u.fops;
+	ino->i_mode = S_IFREG | S_IRUSR;
+	ino->i_nlink = 1;
+	ino->i_gid = 0;
+	ino->i_uid = 0;
+	return 0;
+}
+
+static inline struct dentry *bc_lookup_root_files(struct inode *dir,
+		struct dentry *de)
+{
+	return bc_lookup_file(dir, de, bc_root_entries,
+			bc_looktest_root_entry, bc_lookset_root_entry);
+}
+
+/*
+ * /proc/bc/.../<id> directory operations
+ */
+
+static int bc_entry_readdir(struct file *file, void *data, filldir_t filler)
+{
+	return bc_readdir(file, filler, data,
+			(struct user_beancounter *)file->f_dentry->d_fsdata);
+}
+
+static struct dentry *bc_entry_lookup(struct inode *dir, struct dentry *dentry,
+		struct nameidata *nd)
+{
+	int id;
+	char *end;
+	struct user_beancounter *par, *ub;
+	struct dentry *de;
+
+	if (!(capable(CAP_DAC_OVERRIDE) && capable(CAP_DAC_READ_SEARCH)))
+		return ERR_PTR(-EPERM);
+
+	de = bc_lookup_files(dir, dentry);
+	if (de != ERR_PTR(-ESRCH))
+		return de;
+
+	id = simple_strtol(dentry->d_name.name, &end, 10);
+	if (*end != '.')
+		return ERR_PTR(-ENOENT);
+
+	par = (struct user_beancounter *)dir->u.generic_ip;
+	if (par->ub_uid != id)
+		return ERR_PTR(-ENOENT);
+
+	id = simple_strtol(end + 1, &end, 10);
+	if (*end != '\0')
+		return ERR_PTR(-ENOENT);
+
+	ub = get_subbeancounter_byid(par, id, 0);
+	if (ub == NULL)
+		return ERR_PTR(-ENOENT);
+
+	return bc_lookup(ub, dir, dentry);
+}
+
+static struct file_operations bc_entry_fops = {
+	.read = generic_read_dir,
+	.readdir = bc_entry_readdir,
+};
+
+static struct inode_operations bc_entry_iops = {
+	.lookup = bc_entry_lookup,
+};
+
+/*
+ * /proc/bc directory operations
+ */
+
+static int bc_root_readdir(struct file *file, void *data, filldir_t filler)
+{
+	return bc_readdir(file, filler, data, NULL);
+}
+
+static struct dentry *bc_root_lookup(struct inode *dir, struct dentry *dentry,
+		struct nameidata *nd)
+{
+	int id;
+	char *end;
+	struct user_beancounter *ub;
+	struct dentry *de;
+
+	if (!(capable(CAP_DAC_OVERRIDE) && capable(CAP_DAC_READ_SEARCH)))
+		return ERR_PTR(-EPERM);
+
+	de = bc_lookup_root_files(dir, dentry);
+	if (de != ERR_PTR(-ESRCH))
+		return de;
+
+	id = simple_strtol(dentry->d_name.name, &end, 10);
+	if (*end != '\0')
+		return ERR_PTR(-ENOENT);
+
+	ub = get_beancounter_byuid(id, 0);
+	if (ub == NULL)
+		return ERR_PTR(-ENOENT);
+
+	return bc_lookup(ub, dir, dentry);
+}
+
+static struct file_operations bc_root_fops = {
+	.read = generic_read_dir,
+	.readdir = bc_root_readdir,
+};
+
+static struct inode_operations bc_root_iops = {
+	.lookup = bc_root_lookup,
+};
+
+static int __init ub_init_proc(void)
+{
+	struct proc_dir_entry *entry;
+	struct proc_dir_entry *bc_proc_root;
+
+	bc_proc_root = create_proc_entry("bc",
+			S_IFDIR | S_IRUGO | S_IXUGO, NULL);
+	if (bc_proc_root == NULL)
+		panic("Can't create /proc/bc entry");
+
+	bc_proc_root->proc_fops = &bc_root_fops;
+	bc_proc_root->proc_iops = &bc_root_iops;
+
+	bc_register_proc_entry(&bc_resources_entry);
+#ifdef CONFIG_UBC_DEBUG
+	bc_register_proc_entry(&bc_debug_entry);
+#endif
+	bc_register_proc_root_entry(&bc_all_resources_entry);
+
+	entry = create_proc_glob_entry("user_beancounters", S_IRUGO, NULL);
+	entry->proc_fops = &ub_file_operations;
+	return 0;
+}
+
+core_initcall(ub_init_proc);
diff -upr linux-2.6.16.46-0.12.orig/kernel/ub/ub_stat.c linux-2.6.16.46-0.12-027test011/kernel/ub/ub_stat.c
--- linux-2.6.16.46-0.12.orig/kernel/ub/ub_stat.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/ub/ub_stat.c	2007-08-28 17:35:30.000000000 +0400
@@ -0,0 +1,452 @@
+/*
+ *  kernel/ub/ub_stat.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/timer.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/list.h>
+#include <linux/errno.h>
+#include <linux/suspend.h>
+
+#include <asm/uaccess.h>
+#include <asm/param.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_hash.h>
+#include <ub/ub_stat.h>
+
+static spinlock_t ubs_notify_lock = SPIN_LOCK_UNLOCKED;
+static LIST_HEAD(ubs_notify_list);
+static long ubs_min_interval;
+static ubstattime_t ubs_start_time, ubs_end_time;
+static struct timer_list ubs_timer;
+
+static int ubstat_get_list(void *buf, long size)
+{
+	int retval;
+	struct user_beancounter *ub, *ubp;
+	long *page, *ptr, *end;
+	int len;
+
+	page = (long *)__get_free_page(GFP_KERNEL);
+	if (page == NULL)
+		return -ENOMEM;
+
+	retval = 0;
+	ubp = NULL;
+	ptr = page;
+	end = page + PAGE_SIZE / sizeof(*ptr);
+
+	spin_lock_irq(&ub_hash_lock);
+	for_each_beancounter(ub) {
+		if (ub->parent != NULL)
+			continue;
+		*ptr++ = ub->ub_uid;
+		if (ptr != end)
+			continue;
+
+		get_beancounter(ub);
+		spin_unlock_irq(&ub_hash_lock);
+
+		put_beancounter(ubp);
+		ubp = ub;
+
+		len = min_t(long, (ptr - page) * sizeof(*ptr), size);
+		if (copy_to_user(buf, page, len)) {
+			retval = -EFAULT;
+			goto out_put;
+		}
+		retval += len;
+		if (len < PAGE_SIZE)
+			goto out_put;
+		buf += len;
+		size -= len;
+
+		ptr = page;
+		end = page + PAGE_SIZE / sizeof(*ptr);
+
+		spin_lock_irq(&ub_hash_lock);
+	}
+	spin_unlock_irq(&ub_hash_lock);
+
+	put_beancounter(ubp);
+	size = min_t(long, (ptr - page) * sizeof(*ptr), size);
+	if (size > 0 && copy_to_user(buf, page, size)) {
+		retval = -EFAULT;
+		goto out_put;
+	}
+	retval += size;
+
+out_put:
+	put_beancounter(ubp);
+	free_page((unsigned long)page);
+	return retval;
+}
+
+static int ubstat_gettime(void *buf, long size)
+{
+	ubgettime_t data;
+	int retval;
+
+	spin_lock(&ubs_notify_lock);
+	data.start_time = ubs_start_time;
+	data.end_time = ubs_end_time;
+	data.cur_time = ubs_start_time + (jiffies - ubs_start_time * HZ) / HZ;
+	spin_unlock(&ubs_notify_lock);
+
+	retval = min_t(long, sizeof(data), size);
+	if (copy_to_user(buf, &data, retval))
+		retval = -EFAULT;
+	return retval;
+}
+
+static int ubstat_do_read_one(struct user_beancounter *ub, int res, void *kbuf)
+{
+	struct {
+		ubstattime_t	start_time;
+		ubstattime_t	end_time;
+		ubstatparm_t	param[1];
+	} *data;
+
+	data = kbuf;
+	data->start_time = ubs_start_time;
+	data->end_time = ubs_end_time;
+
+	data->param[0].maxheld = ub->ub_store[res].maxheld;
+	data->param[0].failcnt = ub->ub_store[res].failcnt;
+
+	return sizeof(*data);
+}
+
+static int ubstat_do_read_all(struct user_beancounter *ub, void *kbuf, int size)
+{
+	int wrote;
+	struct {
+		ubstattime_t	start_time;
+		ubstattime_t	end_time;
+		ubstatparm_t	param[UB_RESOURCES];
+	} *data;
+	int resource;
+
+	data = kbuf;
+	data->start_time = ubs_start_time;
+	data->end_time = ubs_end_time;
+	wrote = sizeof(data->start_time) + sizeof(data->end_time);
+
+	for (resource = 0; resource < UB_RESOURCES; resource++) {
+		if (size < wrote + sizeof(data->param[resource]))
+			break;
+		data->param[resource].maxheld = ub->ub_store[resource].maxheld;
+		data->param[resource].failcnt = ub->ub_store[resource].failcnt;
+		wrote += sizeof(data->param[resource]);
+	}
+
+	return wrote;
+}
+
+static int ubstat_do_read_full(struct user_beancounter *ub, void *kbuf,
+		int size)
+{
+	int wrote;
+	struct {
+		ubstattime_t	start_time;
+		ubstattime_t	end_time;
+		ubstatparmf_t	param[UB_RESOURCES];
+	} *data;
+	int resource;
+
+	data = kbuf;
+	data->start_time = ubs_start_time;
+	data->end_time = ubs_end_time;
+	wrote = sizeof(data->start_time) + sizeof(data->end_time);
+
+	for (resource = 0; resource < UB_RESOURCES; resource++) {
+		if (size < wrote + sizeof(data->param[resource]))
+			break;
+		/* The beginning of ubstatparmf_t matches struct ubparm. */
+		memcpy(&data->param[resource], &ub->ub_store[resource],
+				sizeof(ub->ub_store[resource]));
+		data->param[resource].__unused1 = 0;
+		data->param[resource].__unused2 = 0;
+		wrote += sizeof(data->param[resource]);
+	}
+	return wrote;
+}
+
+static int ubstat_get_stat(struct user_beancounter *ub, long cmd,
+		void *buf, long size)
+{
+	void *kbuf;
+	int retval;
+
+	kbuf = (void *)__get_free_page(GFP_KERNEL);
+	if (kbuf == NULL)
+		return -ENOMEM;
+
+	spin_lock(&ubs_notify_lock);
+	switch (UBSTAT_CMD(cmd)) {
+		case UBSTAT_READ_ONE:
+			retval = -EINVAL;
+			if (UBSTAT_PARMID(cmd) >= UB_RESOURCES)
+				break;
+			retval = ubstat_do_read_one(ub,
+					UBSTAT_PARMID(cmd), kbuf);
+			break;
+		case UBSTAT_READ_ALL:
+			retval = ubstat_do_read_all(ub, kbuf, PAGE_SIZE);
+			break;
+		case UBSTAT_READ_FULL:
+			retval = ubstat_do_read_full(ub, kbuf, PAGE_SIZE);
+			break;
+		default:
+			retval = -EINVAL;
+	}
+	spin_unlock(&ubs_notify_lock);
+
+	if (retval > 0) {
+		retval = min_t(long, retval, size);
+		if (copy_to_user(buf, kbuf, retval))
+			retval = -EFAULT;
+	}
+
+	free_page((unsigned long)kbuf);
+	return retval;
+}
+
+static int ubstat_handle_notifrq(ubnotifrq_t *req)
+{
+	int retval;
+	struct ub_stat_notify *new_notify;
+	struct list_head *entry;
+	struct task_struct *tsk_to_free;
+
+	new_notify = kmalloc(sizeof(new_notify), GFP_KERNEL);
+	if (new_notify == NULL)
+		return -ENOMEM;
+
+	tsk_to_free = NULL;
+	INIT_LIST_HEAD(&new_notify->list);
+
+	spin_lock(&ubs_notify_lock);
+	list_for_each(entry, &ubs_notify_list) {
+		struct ub_stat_notify *notify;
+
+		notify = list_entry(entry, struct ub_stat_notify, list);
+		if (notify->task == current) {
+			kfree(new_notify);
+			new_notify = notify;
+			break;
+		}
+	}
+
+	retval = -EINVAL;
+	if (req->maxinterval < 1)
+		goto out_unlock;
+	if (req->maxinterval > TIME_MAX_SEC)
+		req->maxinterval = TIME_MAX_SEC;
+	if (req->maxinterval < ubs_min_interval) {
+		unsigned long dif;
+
+		ubs_min_interval = req->maxinterval;
+		dif = (ubs_timer.expires - jiffies + HZ - 1) / HZ;
+		if (dif > req->maxinterval)
+			mod_timer(&ubs_timer,
+					ubs_timer.expires -
+					(dif - req->maxinterval) * HZ);
+	}
+
+	if (entry != &ubs_notify_list) {
+		list_del(&new_notify->list);
+		tsk_to_free = new_notify->task;
+	}
+	if (req->signum) {
+		new_notify->task = current;
+		get_task_struct(new_notify->task);
+		new_notify->signum = req->signum;
+		list_add(&new_notify->list, &ubs_notify_list);
+	} else
+		kfree(new_notify);
+	retval = 0;
+out_unlock:
+	spin_unlock(&ubs_notify_lock);
+	if (tsk_to_free != NULL)
+		put_task_struct(tsk_to_free);
+	return retval;
+}
+
+/*
+ * former sys_ubstat
+ */
+long do_ubstat(int func, unsigned long arg1, unsigned long arg2, void *buf,
+		long size)
+{
+	int retval;
+	struct user_beancounter *ub;
+
+	if (func == UBSTAT_UBPARMNUM)
+		return UB_RESOURCES;
+	if (func == UBSTAT_UBLIST)
+		return ubstat_get_list(buf, size);
+	if (!(capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)))
+		return -EPERM;
+
+	if (func == UBSTAT_GETTIME) {
+		retval = ubstat_gettime(buf, size);
+		goto notify;
+	}
+
+	ub = get_exec_ub();
+	if (ub != NULL && ub->ub_uid == arg1)
+		get_beancounter(ub);
+	else /* FIXME must be if (ve_is_super) */
+		ub = get_beancounter_byuid(arg1, 0);
+
+	if (ub == NULL)
+		return -ESRCH;
+
+	retval = ubstat_get_stat(ub, func, buf, size);
+	put_beancounter(ub);
+notify:
+	/* Handle request for notification */
+	if (retval >= 0) {
+		ubnotifrq_t notifrq;
+		int err;
+
+		err = -EFAULT;
+		if (!copy_from_user(&notifrq, (void *)arg2, sizeof(notifrq)))
+			err = ubstat_handle_notifrq(&notifrq);
+		if (err)
+			retval = err;
+	}
+
+	return retval;
+}
+
+static void ubstat_save_onestat(struct user_beancounter *ub)
+{
+	int resource;
+
+	/* called with local irq disabled */
+	spin_lock(&ub->ub_lock);
+	for (resource = 0; resource < UB_RESOURCES; resource++) {
+		memcpy(&ub->ub_store[resource], &ub->ub_parms[resource],
+			sizeof(struct ubparm));
+		ub->ub_parms[resource].minheld =
+			ub->ub_parms[resource].maxheld =
+			ub->ub_parms[resource].held;
+	}
+	spin_unlock(&ub->ub_lock);
+}
+
+static void ubstat_save_statistics(void)
+{
+	unsigned long flags;
+	struct user_beancounter *ub;
+
+	local_irq_save(flags);
+	for_each_beancounter (ub)
+		ubstat_save_onestat(ub);
+	local_irq_restore(flags);
+}
+
+static void ubstatd_timeout(unsigned long __data)
+{
+	struct task_struct *p;
+
+	p = (struct task_struct *) __data;
+	wake_up_process(p);
+}
+
+/*
+ * Safe wrapper for send_sig. It prevents a race with release_task
+ * for sighand.
+ * Should be called under tasklist_lock.
+ */
+static void task_send_sig(struct ub_stat_notify *notify)
+{
+	if (likely(notify->task->sighand != NULL))
+		send_sig(notify->signum, notify->task, 1);
+}
+
+static inline void do_notifies(void)
+{
+	LIST_HEAD(notif_free_list);
+	struct ub_stat_notify *notify;
+	struct ub_stat_notify *tmp;
+
+	spin_lock(&ubs_notify_lock);
+	ubs_start_time = ubs_end_time;
+	/*
+	 * the expression below relies on time being unsigned long and
+	 * arithmetic promotion rules
+	 */
+	ubs_end_time += (ubs_timer.expires - ubs_start_time * HZ) / HZ;
+	mod_timer(&ubs_timer, ubs_timer.expires + ubs_min_interval * HZ);
+	ubs_min_interval = TIME_MAX_SEC;
+	/* save statistics accumulated for the interval */
+	ubstat_save_statistics();
+	/* send signals */
+	read_lock(&tasklist_lock);
+	while (!list_empty(&ubs_notify_list)) {
+		notify = list_entry(ubs_notify_list.next,
+				struct ub_stat_notify, list);
+		task_send_sig(notify);
+		list_del(&notify->list);
+		list_add(&notify->list, &notif_free_list);
+	}
+	read_unlock(&tasklist_lock);
+	spin_unlock(&ubs_notify_lock);
+
+	list_for_each_entry_safe(notify, tmp, &notif_free_list, list) {
+		put_task_struct(notify->task);
+		kfree(notify);
+	}
+}
+
+/*
+ * Kernel thread
+ */
+static int ubstatd(void *unused)
+{
+	/* daemonize call will take care of signals */
+	daemonize("ubstatd");
+
+	ubs_timer.data = (unsigned long)current;
+	ubs_timer.function = ubstatd_timeout;
+	add_timer(&ubs_timer);
+
+	while (1) {
+		set_task_state(current, TASK_INTERRUPTIBLE);
+		if (time_after(ubs_timer.expires, jiffies)) {
+			schedule();
+			try_to_freeze();
+			continue;
+		}
+
+		__set_task_state(current, TASK_RUNNING);
+		do_notifies();
+	}
+	return 0;
+}
+
+static int __init ubstatd_init(void)
+{
+	init_timer(&ubs_timer);
+	ubs_timer.expires = TIME_MAX_JIF;
+	ubs_min_interval = TIME_MAX_SEC;
+	ubs_start_time = ubs_end_time = 0;
+
+	kernel_thread(ubstatd, NULL, 0);
+	return 0;
+}
+
+module_init(ubstatd_init);
diff -upr linux-2.6.16.46-0.12.orig/kernel/ub/ub_sys.c linux-2.6.16.46-0.12-027test011/kernel/ub/ub_sys.c
--- linux-2.6.16.46-0.12.orig/kernel/ub/ub_sys.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/ub/ub_sys.c	2007-08-28 17:35:32.000000000 +0400
@@ -0,0 +1,174 @@
+/*
+ *  kernel/ub/ub_sys.c
+ *
+ *  Copyright (C) 2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/virtinfo.h>
+#include <linux/compat.h>
+#include <asm/uaccess.h>
+
+#include <ub/beancounter.h>
+
+/*
+ *	The (rather boring) getluid syscall
+ */
+asmlinkage long sys_getluid(void)
+{
+	struct user_beancounter *ub;
+
+	ub = get_exec_ub();
+	if (ub == NULL)
+		return -EINVAL;
+
+	return ub->ub_uid;
+}
+
+/*
+ *	The setluid syscall
+ */
+asmlinkage long sys_setluid(uid_t uid)
+{
+	struct user_beancounter *ub;
+	struct task_beancounter *task_bc;
+	int error;
+
+	task_bc = &current->task_bc;
+
+	/* You may not disown a setluid */
+	error = -EINVAL;
+	if (uid == (uid_t)-1)
+		goto out;
+
+	/* You may only set an ub as root */
+	error = -EPERM;
+	if (!capable(CAP_SETUID))
+		goto out;
+	/*
+	 * The ub once set is irrevocable to all
+	 * unless it's set from ve0.
+	 */
+	if (!ve_is_super(get_exec_env()))
+		goto out;
+
+	/* Ok - set up a beancounter entry for this user */
+	error = -ENOBUFS;
+	ub = get_beancounter_byuid(uid, 1);
+	if (ub == NULL)
+		goto out;
+
+	ub_debug(UBD_ALLOC | UBD_LIMIT, "setluid, bean %p (count %d) "
+			"for %.20s pid %d\n",
+			ub, atomic_read(&ub->ub_refcount),
+			current->comm, current->pid);
+	/* install bc */
+	error = virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_NEWUBC, ub);
+	if (!(error & NOTIFY_FAIL)) {
+		put_beancounter(task_bc->exec_ub);
+		task_bc->exec_ub = ub;
+		if (!(error & NOTIFY_OK)) {
+			put_beancounter(task_bc->fork_sub);
+			task_bc->fork_sub = get_beancounter(ub);
+		}
+		error = 0;
+	} else {
+		put_beancounter(ub);
+		error = -ENOBUFS;
+	}
+out:
+	return error;
+}
+
+long do_setublimit(uid_t uid, unsigned long resource,
+		unsigned long *new_limits)
+{
+	int error;
+	unsigned long flags;
+	struct user_beancounter *ub;
+
+	error = -EPERM;
+	if(!capable(CAP_SYS_RESOURCE))
+		goto out;
+
+	if (!ve_is_super(get_exec_env()))
+		goto out;
+
+	error = -EINVAL;
+	if (resource >= UB_RESOURCES)
+		goto out;
+
+	error = -EINVAL;
+	if (new_limits[0] > UB_MAXVALUE || new_limits[1] > UB_MAXVALUE)
+		goto out;
+
+	error = -ENOENT;
+	ub = get_beancounter_byuid(uid, 0);
+	if (ub == NULL) {
+		ub_debug(UBD_LIMIT, "No login bc for uid %d\n", uid);
+		goto out;
+	}
+
+	spin_lock_irqsave(&ub->ub_lock, flags);
+	ub->ub_parms[resource].barrier = new_limits[0];
+	ub->ub_parms[resource].limit = new_limits[1];
+	spin_unlock_irqrestore(&ub->ub_lock, flags);
+
+	put_beancounter(ub);
+
+	error = 0;
+out:
+	return error;
+}
+
+/*
+ *	The setbeanlimit syscall
+ */
+asmlinkage long sys_setublimit(uid_t uid, unsigned long resource,
+		unsigned long __user *limits)
+{
+	unsigned long new_limits[2];
+
+	if (copy_from_user(&new_limits, limits, sizeof(new_limits)))
+		return -EFAULT;
+
+	return do_setublimit(uid, resource, new_limits);
+}
+
+extern long do_ubstat(int func, unsigned long arg1, unsigned long arg2,
+		void *buf, long size);
+asmlinkage long sys_ubstat(int func, unsigned long arg1, unsigned long arg2,
+		void *buf, long size)
+{
+	if (!ve_is_super(get_exec_env()))
+		return -EPERM;
+
+	return do_ubstat(func, arg1, arg2, buf, size);
+}
+
+#ifdef CONFIG_COMPAT
+asmlinkage long compat_sys_setublimit(uid_t uid, int resource,
+		unsigned int __user *limits)
+{
+	unsigned int u_new_limits[2];
+	unsigned long new_limits[2];
+
+        if (copy_from_user(&u_new_limits, limits, sizeof(u_new_limits)))
+                return -EFAULT;
+
+	new_limits[0] = u_new_limits[0];
+	new_limits[1] = u_new_limits[1];
+
+	return do_setublimit(uid, resource, new_limits);
+}
+
+asmlinkage long compat_sys_ubstat(int func, unsigned int arg1,
+		unsigned int arg2, compat_uptr_t *buf, long size)
+{
+	return sys_ubstat(func, arg1, arg2, buf, size);
+}
+#endif
diff -upr linux-2.6.16.46-0.12.orig/kernel/user.c linux-2.6.16.46-0.12-027test011/kernel/user.c
--- linux-2.6.16.46-0.12.orig/kernel/user.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/user.c	2007-08-28 17:35:33.000000000 +0400
@@ -14,6 +14,7 @@
 #include <linux/bitops.h>
 #include <linux/key.h>
 #include <linux/interrupt.h>
+#include <linux/module.h>
 
 /*
  * UID task count cache, to get fast user lookup in "alloc_uid"
@@ -24,7 +25,20 @@
 #define UIDHASH_SZ		(1 << UIDHASH_BITS)
 #define UIDHASH_MASK		(UIDHASH_SZ - 1)
 #define __uidhashfn(uid)	(((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK)
-#define uidhashentry(uid)	(uidhash_table + __uidhashfn((uid)))
+#define __uidhashentry(uid)	(uidhash_table + __uidhashfn((uid)))
+
+#ifdef CONFIG_VE
+#define UIDHASH_MASK_VE			(UIDHASH_SZ_VE - 1)
+#define __uidhashfn_ve(uid)		(((uid >> UIDHASH_BITS_VE) ^ uid) & \
+						UIDHASH_MASK_VE)
+#define __uidhashentry_ve(uid, envid)	((envid)->uidhash_table + \
+						__uidhashfn_ve(uid))
+#define uidhashentry_ve(uid)		(ve_is_super(get_exec_env()) ?	\
+						__uidhashentry(uid) :	\
+						__uidhashentry_ve(uid, get_exec_env()))
+#else
+#define uidhashentry_ve(uid)		__uidhashentry(uid)
+#endif
 
 static kmem_cache_t *uid_cachep;
 static struct list_head uidhash_table[UIDHASH_SZ];
@@ -96,7 +110,7 @@ struct user_struct *find_user(uid_t uid)
 	unsigned long flags;
 
 	spin_lock_irqsave(&uidhash_lock, flags);
-	ret = uid_hash_find(uid, uidhashentry(uid));
+	ret = uid_hash_find(uid, uidhashentry_ve(uid));
 	spin_unlock_irqrestore(&uidhash_lock, flags);
 	return ret;
 }
@@ -115,10 +129,11 @@ void free_uid(struct user_struct *up)
 	}
 	local_irq_restore(flags);
 }
+EXPORT_SYMBOL_GPL(free_uid);
 
 struct user_struct * alloc_uid(uid_t uid)
 {
-	struct list_head *hashent = uidhashentry(uid);
+	struct list_head *hashent = uidhashentry_ve(uid);
 	struct user_struct *up;
 
 	spin_lock_irq(&uidhash_lock);
@@ -168,6 +183,7 @@ struct user_struct * alloc_uid(uid_t uid
 	}
 	return up;
 }
+EXPORT_SYMBOL_GPL(alloc_uid);
 
 void switch_uid(struct user_struct *new_user)
 {
@@ -186,21 +202,21 @@ void switch_uid(struct user_struct *new_
 	free_uid(old_user);
 	suid_keys(current);
 }
-
+EXPORT_SYMBOL_GPL(switch_uid);
 
 static int __init uid_cache_init(void)
 {
 	int n;
 
 	uid_cachep = kmem_cache_create("uid_cache", sizeof(struct user_struct),
-			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
 
 	for(n = 0; n < UIDHASH_SZ; ++n)
 		INIT_LIST_HEAD(uidhash_table + n);
 
 	/* Insert the root user immediately (init already runs as root) */
 	spin_lock_irq(&uidhash_lock);
-	uid_hash_insert(&root_user, uidhashentry(0));
+	uid_hash_insert(&root_user, __uidhashentry(0));
 	spin_unlock_irq(&uidhash_lock);
 
 	return 0;
diff -upr linux-2.6.16.46-0.12.orig/kernel/ve/Makefile linux-2.6.16.46-0.12-027test011/kernel/ve/Makefile
--- linux-2.6.16.46-0.12.orig/kernel/ve/Makefile	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/ve/Makefile	2007-08-28 17:35:31.000000000 +0400
@@ -0,0 +1,16 @@
+#
+#
+#  kernel/ve/Makefile
+#
+#  Copyright (C) 2000-2005  SWsoft
+#  All rights reserved.
+#
+#  Licensing governed by "linux/COPYING.SWsoft" file.
+
+obj-y = ve.o veowner.o hooks.o devperms.o
+
+obj-$(CONFIG_VZ_DEV) += vzdev.o
+obj-$(CONFIG_VZ_WDOG) += vzwdog.o
+obj-$(CONFIG_VE_CALLS) += vzmon.o
+
+vzmon-objs = vecalls.o
diff -upr linux-2.6.16.46-0.12.orig/kernel/ve/devperms.c linux-2.6.16.46-0.12-027test011/kernel/ve/devperms.c
--- linux-2.6.16.46-0.12.orig/kernel/ve/devperms.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/ve/devperms.c	2007-08-28 17:35:31.000000000 +0400
@@ -0,0 +1,418 @@
+/*
+ *  linux/kernel/ve/devperms.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ * Devices permissions routines,
+ * character and block devices separately
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/stat.h>
+#include <linux/list.h>
+#include <linux/ve.h>
+#include <linux/seq_file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/rcupdate.h>
+#include <linux/vzcalluser.h>
+#include <linux/kdev_t.h>
+#include <linux/major.h>
+
+/*
+ * Rules applied in the following order:
+ *  MAJOR!=0, MINOR!=0
+ *  MAJOR!=0, MINOR==0
+ *  MAJOR==0, MINOR==0
+ */
+
+struct devperms_struct {
+	dev_t   	dev;	/* device id */
+	unsigned char	mask;
+	unsigned 	type;
+	envid_t	 	veid;
+
+	struct hlist_node	hash;
+	struct rcu_head		rcu;
+};
+
+static struct devperms_struct default_major_perms[] = {
+	{
+		MKDEV(UNIX98_PTY_MASTER_MAJOR, 0),
+		S_IROTH | S_IWOTH,
+		S_IFCHR,
+	},
+	{
+		MKDEV(UNIX98_PTY_SLAVE_MAJOR, 0),
+		S_IROTH | S_IWOTH,
+		S_IFCHR,
+	},
+	{
+		MKDEV(PTY_MASTER_MAJOR, 0),
+		S_IROTH | S_IWOTH,
+		S_IFCHR,
+	},
+	{
+		MKDEV(PTY_SLAVE_MAJOR, 0),
+		S_IROTH | S_IWOTH,
+		S_IFCHR,
+	},
+};
+
+static struct devperms_struct default_minor_perms[] = {
+	{
+		MKDEV(MEM_MAJOR, 3),	/* null */
+		S_IROTH | S_IWOTH,
+		S_IFCHR,
+	},
+	{
+		MKDEV(MEM_MAJOR, 5),	/* zero */
+		S_IROTH | S_IWOTH,
+		S_IFCHR,
+	},
+	{
+		MKDEV(MEM_MAJOR, 7),	/* full */
+		S_IROTH | S_IWOTH,
+		S_IFCHR,
+	},
+	{
+		MKDEV(TTYAUX_MAJOR, 0),	/* tty */
+		S_IROTH | S_IWOTH,
+		S_IFCHR,
+	},
+	{
+		MKDEV(TTYAUX_MAJOR, 2),	/* ptmx */
+		S_IROTH | S_IWOTH,
+		S_IFCHR,
+	},
+	{
+		MKDEV(MEM_MAJOR, 8),	/* random */
+		S_IROTH,
+		S_IFCHR,
+	},
+	{
+		MKDEV(MEM_MAJOR, 9),	/* urandom */
+		S_IROTH,
+		S_IFCHR
+	},
+};
+
+static struct devperms_struct default_deny_perms = {
+	MKDEV(0, 0),
+	0,
+	S_IFCHR,
+};
+
+static inline struct devperms_struct *find_default_devperms(int type, dev_t dev)
+{
+	int i;
+
+	/* XXX all defaults perms are S_IFCHR */
+	if (type != S_IFCHR)
+		return &default_deny_perms;
+
+	for (i = 0; i < ARRAY_SIZE(default_minor_perms); i++)
+		if (MAJOR(dev) == MAJOR(default_minor_perms[i].dev) &&
+				MINOR(dev) == MINOR(default_minor_perms[i].dev))
+			return &default_minor_perms[i];
+
+	for (i = 0; i < ARRAY_SIZE(default_major_perms); i++)
+		if (MAJOR(dev) == MAJOR(default_major_perms[i].dev))
+			return &default_major_perms[i];
+
+	return &default_deny_perms;
+}
+
+#define DEVPERMS_HASH_SZ 512
+#define devperms_hashfn(id, dev) \
+	( (id << 5) ^ (id >> 5) ^ (MAJOR(dev)) ^ MINOR(dev) ) & \
+						(DEVPERMS_HASH_SZ - 1)
+
+static DEFINE_SPINLOCK(devperms_hash_lock);
+static struct hlist_head devperms_hash[DEVPERMS_HASH_SZ];
+
+static inline struct devperms_struct *find_devperms(envid_t veid,
+						    int type,
+						    dev_t dev)
+{
+	struct hlist_head *table;
+	struct devperms_struct *perms;
+	struct hlist_node *h;
+
+	table = &devperms_hash[devperms_hashfn(veid, dev)];
+	hlist_for_each_entry_rcu (perms, h, table, hash)
+		if (perms->type == type && perms->veid == veid &&
+				MAJOR(perms->dev) == MAJOR(dev) &&
+				MINOR(perms->dev) == MINOR(dev))
+			return perms;
+
+	return NULL;
+}
+
+static void free_devperms(struct rcu_head *rcu)
+{
+	struct devperms_struct *perms;
+
+	perms = container_of(rcu, struct devperms_struct, rcu);
+	kfree(perms);
+}
+
+/* API calls */
+
+void clean_device_perms_ve(envid_t veid)
+{
+	int i;
+	struct devperms_struct *p;
+	struct hlist_node *n, *tmp;
+
+	spin_lock(&devperms_hash_lock);
+	for (i = 0; i < DEVPERMS_HASH_SZ; i++)
+		hlist_for_each_entry_safe (p, n, tmp, &devperms_hash[i], hash)
+			if (p->veid == veid) {
+				hlist_del_rcu(&p->hash);
+				call_rcu(&p->rcu, free_devperms);
+			}
+	spin_unlock(&devperms_hash_lock);
+}
+
+EXPORT_SYMBOL(clean_device_perms_ve);
+
+/*
+ * Mode is a mask of
+ *	FMODE_READ	for read access (configurable by S_IROTH)
+ *	FMODE_WRITE	for write access (configurable by S_IWOTH)
+ *	FMODE_QUOTACTL	for quotactl access (configurable by S_IXGRP)
+ */
+
+int get_device_perms_ve(int dev_type, dev_t dev, int access_mode)
+{
+	struct devperms_struct *p;
+	struct ve_struct *ve;
+	envid_t veid;
+	char mask;
+
+	ve = get_exec_env();
+	veid = ve->veid;
+	rcu_read_lock();
+
+	p = find_devperms(veid, dev_type | VE_USE_MINOR, dev);
+	if (p != NULL)
+		goto end;
+
+	p = find_devperms(veid, dev_type | VE_USE_MAJOR, MKDEV(MAJOR(dev),0));
+	if (p != NULL)
+		goto end;
+
+	p = find_devperms(veid, dev_type, MKDEV(0,0));
+	if (p != NULL)
+		goto end;
+
+	if (ve->features & VE_FEATURE_DEF_PERMS) {
+		p = find_default_devperms(dev_type, dev);
+		if (p != NULL)
+			goto end;
+	}
+
+	rcu_read_unlock();
+	return -ENODEV;
+
+end:
+	mask = p->mask;
+	rcu_read_unlock();
+
+	access_mode = "\000\004\002\006\010\014\012\016"[access_mode];
+	return ((mask & access_mode) == access_mode) ? 0 : -EACCES;
+}
+
+EXPORT_SYMBOL(get_device_perms_ve);
+
+int set_device_perms_ve(envid_t veid, unsigned type, dev_t dev, unsigned mask)
+{
+	struct devperms_struct *perms, *new_perms;
+	struct hlist_head *htable;
+
+	new_perms = kmalloc(sizeof(struct devperms_struct), GFP_KERNEL);
+
+	spin_lock(&devperms_hash_lock);
+	perms = find_devperms(veid, type, dev);
+	if (perms != NULL) {
+		kfree(new_perms);
+		perms->mask = mask & S_IALLUGO;
+	} else {
+		switch (type & VE_USE_MASK) {
+		case 0:
+			dev = 0;
+			break;
+		case VE_USE_MAJOR:
+			dev = MKDEV(MAJOR(dev),0);
+			break;
+		}
+
+		new_perms->veid = veid;
+		new_perms->dev = dev;
+		new_perms->type = type;
+		new_perms->mask = mask & S_IALLUGO;
+
+		htable = &devperms_hash[devperms_hashfn(new_perms->veid,
+				new_perms->dev)];
+		hlist_add_head_rcu(&new_perms->hash, htable);
+	}
+	spin_unlock(&devperms_hash_lock);
+	return 0;
+}
+
+EXPORT_SYMBOL(set_device_perms_ve);
+
+#ifdef CONFIG_PROC_FS
+static int devperms_seq_show(struct seq_file *m, void *v)
+{
+	struct devperms_struct *dp;
+	char dev_s[32], type_c;
+	unsigned use, type;
+	dev_t dev;
+
+	dp = (struct devperms_struct *)v;
+	if (dp == (struct devperms_struct *)1L) {
+		seq_printf(m, "Version: 2.7\n");
+		return 0;
+	}
+
+	use = dp->type & VE_USE_MASK;
+	type = dp->type & S_IFMT;
+	dev = dp->dev;
+
+	if ((use | VE_USE_MINOR) == use)
+		snprintf(dev_s, sizeof(dev_s), "%d:%d", MAJOR(dev), MINOR(dev));
+	else if ((use | VE_USE_MAJOR) == use)
+		snprintf(dev_s, sizeof(dev_s), "%d:*", MAJOR(dp->dev));
+	else
+		snprintf(dev_s, sizeof(dev_s), "*:*");
+
+	if (type == S_IFCHR)
+		type_c = 'c';
+	else if (type == S_IFBLK)
+		type_c = 'b';
+	else
+		type_c = '?';
+
+	seq_printf(m, "%10u %c %03o %s\n", dp->veid, type_c, dp->mask, dev_s);
+	return 0;
+}
+
+static void *devperms_seq_start(struct seq_file *m, loff_t *pos)
+{
+	loff_t cpos;
+	long slot;
+	struct devperms_struct *dp;
+	struct hlist_node *h;
+
+	cpos = *pos;
+	rcu_read_lock();
+
+	if (cpos-- == 0)
+		return (void *)1L;
+
+	for (slot = 0; slot < DEVPERMS_HASH_SZ; slot++)
+		hlist_for_each_entry_rcu (dp, h, &devperms_hash[slot], hash)
+			if (cpos-- == 0) {
+				m->private = (void *)slot;
+				return dp;
+			}
+	return NULL;
+}
+
+static void *devperms_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	long slot;
+	struct hlist_node *next;
+	struct devperms_struct *dp;
+
+	dp = (struct devperms_struct *)v;
+
+	if (unlikely(dp == (struct devperms_struct *)1L))
+		slot = 0;
+	else {
+		next = rcu_dereference(dp->hash.next);
+		if (next != NULL)
+			goto out;
+
+		slot = (long)m->private + 1;
+	}
+
+	for (; slot < DEVPERMS_HASH_SZ; slot++) {
+		next = rcu_dereference(devperms_hash[slot].first);
+		if (next == NULL)
+			continue;
+
+		m->private = (void *)slot;
+		goto out;
+	}
+	return NULL;
+
+out:
+	(*pos)++;
+	return hlist_entry(next, struct devperms_struct, hash);
+}
+
+static void devperms_seq_stop(struct seq_file *m, void *v)
+{
+	rcu_read_unlock();
+}
+
+static struct seq_operations devperms_seq_op = {
+	.start	= devperms_seq_start,
+	.next	= devperms_seq_next,
+	.stop	= devperms_seq_stop,
+	.show	= devperms_seq_show,
+};
+
+static int devperms_open(struct inode *inode, struct file *file)
+{
+        return seq_open(file, &devperms_seq_op);
+}
+
+struct file_operations proc_devperms_ops = {
+	.open		= devperms_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+EXPORT_SYMBOL(proc_devperms_ops);
+#endif
+
+/* Initialisation */
+
+static struct devperms_struct original_perms[] =
+{
+	{
+		MKDEV(0,0),
+		S_IROTH | S_IWOTH,
+		S_IFCHR,
+		0,
+	},
+	{
+		MKDEV(0,0),
+		S_IXGRP | S_IROTH | S_IWOTH,
+		S_IFBLK,
+		0,
+	},
+};
+
+static int __init init_devperms_hash(void)
+{
+	hlist_add_head(&original_perms[0].hash,
+			&devperms_hash[devperms_hashfn(0,
+				original_perms[0].dev)]);
+	hlist_add_head(&original_perms[1].hash,
+			&devperms_hash[devperms_hashfn(0,
+				original_perms[1].dev)]);
+	return 0;
+}
+
+core_initcall(init_devperms_hash);
diff -upr linux-2.6.16.46-0.12.orig/kernel/ve/hooks.c linux-2.6.16.46-0.12-027test011/kernel/ve/hooks.c
--- linux-2.6.16.46-0.12.orig/kernel/ve/hooks.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/ve/hooks.c	2007-08-28 17:35:31.000000000 +0400
@@ -0,0 +1,114 @@
+/*
+ *  linux/kernel/ve/hooks.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *  
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/ve.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/ve_proto.h>
+#include <linux/module.h>
+
+static struct list_head ve_hooks[VE_MAX_CHAINS];
+static DECLARE_RWSEM(ve_hook_sem);
+
+void ve_hook_register(int chain, struct ve_hook *vh)
+{
+	struct list_head *lh;
+	struct ve_hook *tmp;
+
+	BUG_ON(chain > VE_MAX_CHAINS);
+
+	down_write(&ve_hook_sem);
+	list_for_each(lh, &ve_hooks[chain]) {
+		tmp = list_entry(lh, struct ve_hook, list);
+		if (vh->priority < tmp->priority)
+			break;
+	}
+
+	list_add_tail(&vh->list, lh);
+	up_write(&ve_hook_sem);
+}
+
+EXPORT_SYMBOL(ve_hook_register);
+
+void ve_hook_unregister(struct ve_hook *vh)
+{
+	down_write(&ve_hook_sem);
+	list_del(&vh->list);
+	up_write(&ve_hook_sem);
+}
+
+EXPORT_SYMBOL(ve_hook_unregister);
+
+static inline int ve_hook_init(struct ve_hook *vh, struct ve_struct *ve)
+{
+	int err;
+
+	err = 0;
+	if (try_module_get(vh->owner)) {
+		err = vh->init(ve);
+		module_put(vh->owner);
+	}
+	return err;
+}
+
+static inline void ve_hook_fini(struct ve_hook *vh, struct ve_struct *ve)
+{
+	if (vh->fini != NULL && try_module_get(vh->owner)) {
+		vh->fini(ve);
+		module_put(vh->owner);
+	}
+}
+
+int ve_hook_iterate_init(int chain, void *ve)
+{
+	struct ve_hook *vh;
+	int err;
+
+	err = 0;
+
+	down_read(&ve_hook_sem);
+	list_for_each_entry(vh, &ve_hooks[chain], list)
+		if ((err = ve_hook_init(vh, ve)) < 0)
+			break;
+
+	if (err)
+		list_for_each_entry_continue_reverse(vh, &ve_hooks[chain], list)
+			ve_hook_fini(vh, ve);
+
+	up_read(&ve_hook_sem);
+	return err;
+}
+
+EXPORT_SYMBOL(ve_hook_iterate_init);
+
+void ve_hook_iterate_fini(int chain, void *ve)
+{
+	struct ve_hook *vh;
+
+	down_read(&ve_hook_sem);
+	list_for_each_entry_reverse(vh, &ve_hooks[chain], list)
+		ve_hook_fini(vh, ve);
+	up_read(&ve_hook_sem);
+}
+
+EXPORT_SYMBOL(ve_hook_iterate_fini);
+
+static int __init ve_hooks_init(void)
+{
+	int i;
+
+	for (i = 0; i < VE_MAX_CHAINS; i++)
+		INIT_LIST_HEAD(&ve_hooks[i]);
+	return 0;
+}
+
+core_initcall(ve_hooks_init);
+
diff -upr linux-2.6.16.46-0.12.orig/kernel/ve/ve.c linux-2.6.16.46-0.12-027test011/kernel/ve/ve.c
--- linux-2.6.16.46-0.12.orig/kernel/ve/ve.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/ve/ve.c	2007-08-28 17:35:36.000000000 +0400
@@ -0,0 +1,171 @@
+/*
+ *  linux/kernel/ve.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+/*
+ * 've.c' helper file performing VE sub-system initialization
+ */
+
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/capability.h>
+#include <linux/ve.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/sys.h>
+#include <linux/kdev_t.h>
+#include <linux/termios.h>
+#include <linux/tty_driver.h>
+#include <linux/netdevice.h>
+#include <linux/utsname.h>
+#include <linux/proc_fs.h>
+#include <linux/kernel_stat.h>
+#include <linux/module.h>
+#include <linux/rcupdate.h>
+#include <linux/ve_proto.h>
+#include <linux/devpts_fs.h>
+
+#include <linux/nfcalls.h>
+
+unsigned long vz_rstamp = 0x37e0f59d;
+EXPORT_SYMBOL(vz_rstamp);
+
+#ifdef CONFIG_MODULES
+struct module no_module = { .state = MODULE_STATE_GOING };
+EXPORT_SYMBOL(no_module);
+#endif
+
+INIT_KSYM_MODULE(ip_tables);
+INIT_KSYM_MODULE(ip6_tables);
+INIT_KSYM_MODULE(iptable_filter);
+INIT_KSYM_MODULE(ip6table_filter);
+INIT_KSYM_MODULE(iptable_mangle);
+INIT_KSYM_MODULE(ip6table_mangle);
+INIT_KSYM_MODULE(ip6t_multiport);
+INIT_KSYM_MODULE(ip6t_REJECT);
+INIT_KSYM_MODULE(ip_conntrack);
+INIT_KSYM_MODULE(ip_conntrack_ftp);
+INIT_KSYM_MODULE(ip_conntrack_irc);
+INIT_KSYM_MODULE(ip_nat);
+INIT_KSYM_MODULE(iptable_nat);
+INIT_KSYM_MODULE(ip_nat_ftp);
+INIT_KSYM_MODULE(ip_nat_irc);
+
+INIT_KSYM_CALL(int, init_netfilter, (void));
+INIT_KSYM_CALL(int, init_iptables, (void));
+INIT_KSYM_CALL(int, init_ip6tables, (void));
+INIT_KSYM_CALL(int, init_iptable_filter, (void));
+INIT_KSYM_CALL(int, init_ip6table_filter, (void));
+INIT_KSYM_CALL(int, init_iptable_mangle, (void));
+INIT_KSYM_CALL(int, init_ip6table_mangle, (void));
+INIT_KSYM_CALL(int, init_ip6table_multiport, (void));
+INIT_KSYM_CALL(int, init_ip6table_REJECT, (void));
+INIT_KSYM_CALL(int, init_iptable_conntrack, (void));
+INIT_KSYM_CALL(int, init_iptable_ftp, (void));
+INIT_KSYM_CALL(int, init_iptable_irc, (void));
+INIT_KSYM_CALL(int, ip_nat_init, (void));
+INIT_KSYM_CALL(int, init_iptable_nat, (void));
+INIT_KSYM_CALL(int, init_iptable_nat_ftp, (void));
+INIT_KSYM_CALL(int, init_iptable_nat_irc, (void));
+INIT_KSYM_CALL(void, fini_iptable_nat_irc, (void));
+INIT_KSYM_CALL(void, fini_iptable_nat_ftp, (void));
+INIT_KSYM_CALL(void, fini_iptable_nat, (void));
+INIT_KSYM_CALL(void, ip_nat_cleanup, (void));
+INIT_KSYM_CALL(void, fini_iptable_irc, (void));
+INIT_KSYM_CALL(void, fini_iptable_ftp, (void));
+INIT_KSYM_CALL(void, fini_iptable_conntrack, (void));
+INIT_KSYM_CALL(void, fini_iptable_filter, (void));
+INIT_KSYM_CALL(void, fini_ip6table_filter, (void));
+INIT_KSYM_CALL(void, fini_iptable_mangle, (void));
+INIT_KSYM_CALL(void, fini_ip6table_mangle, (void));
+INIT_KSYM_CALL(void, fini_ip6table_multiport, (void));
+INIT_KSYM_CALL(void, fini_ip6table_REJECT, (void));
+INIT_KSYM_CALL(void, fini_ip6tables, (void));
+INIT_KSYM_CALL(void, fini_iptables, (void));
+INIT_KSYM_CALL(void, fini_netfilter, (void));
+
+#if defined(CONFIG_VE_CALLS_MODULE) || defined(CONFIG_VE_CALLS)
+INIT_KSYM_MODULE(vzmon);
+INIT_KSYM_CALL(void, real_do_env_free, (struct ve_struct *env));
+
+void do_env_free(struct ve_struct *env)
+{
+	KSYMSAFECALL_VOID(vzmon, real_do_env_free, (env));
+}
+EXPORT_SYMBOL(do_env_free);
+#endif
+
+#if defined(CONFIG_VE_ETHDEV) || defined(CONFIG_VE_ETHDEV_MODULE)
+INIT_KSYM_MODULE(vzethdev);
+INIT_KSYM_CALL(int, veth_open, (struct net_device *dev));
+#endif
+
+struct ve_struct ve0 = {
+	.ve_list		= LIST_HEAD_INIT(ve0.ve_list),
+	.utsname		= &system_utsname,
+	.vetask_lh		= LIST_HEAD_INIT(ve0.vetask_lh),
+	.start_jiffies		= INITIAL_JIFFIES,
+#ifdef CONFIG_NET
+	._net_dev_tail		= &ve0._net_dev_base,
+	.ifindex		= -1,
+#endif
+#ifdef CONFIG_UNIX98_PTYS
+	.devpts_config		= &devpts_config,
+#endif
+	.is_running		= 1,
+};
+
+EXPORT_SYMBOL(ve0);
+
+#ifdef CONFIG_SMP
+static struct percpu_data ve0_cpu_stats;
+#endif
+static struct ve_cpu_stats ve0_cpu_stats_data[NR_CPUS];
+
+LIST_HEAD(ve_list_head);
+rwlock_t ve_list_lock = RW_LOCK_UNLOCKED;
+
+LIST_HEAD(ve_cleanup_list);
+DEFINE_SPINLOCK(ve_cleanup_lock);
+struct task_struct *ve_cleanup_thread;
+
+EXPORT_SYMBOL(ve_list_lock);
+EXPORT_SYMBOL(ve_list_head);
+EXPORT_SYMBOL(ve_cleanup_lock);
+EXPORT_SYMBOL(ve_cleanup_list);
+EXPORT_SYMBOL(ve_cleanup_thread);
+
+void init_ve0(void)
+{
+	struct ve_struct *ve;
+
+	ve = get_ve0();
+	(void)get_ve(ve);
+	atomic_set(&ve->pcounter, 1);
+
+	ve->cpu_stats = static_percpu_ptr(&ve0_cpu_stats,
+			ve0_cpu_stats_data);
+
+	list_add(&ve->ve_list, &ve_list_head);
+}
+
+void ve_cleanup_schedule(struct ve_struct *ve)
+{
+	BUG_ON(ve_cleanup_thread == NULL);
+
+	spin_lock(&ve_cleanup_lock);
+	list_add_tail(&ve->cleanup_list, &ve_cleanup_list);
+	spin_unlock(&ve_cleanup_lock);
+
+	wake_up_process(ve_cleanup_thread);
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/ve/vecalls.c linux-2.6.16.46-0.12-027test011/kernel/ve/vecalls.c
--- linux-2.6.16.46-0.12.orig/kernel/ve/vecalls.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/ve/vecalls.c	2007-08-28 17:35:36.000000000 +0400
@@ -0,0 +1,2960 @@
+/*
+ *  linux/kernel/vecalls.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ */
+
+/*
+ * 'vecalls.c' is file with basic VE support. It provides basic primities
+ * along with initialization script
+ */
+
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/capability.h>
+#include <linux/ve.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/sys.h>
+#include <linux/fs.h>
+#include <linux/namespace.h>
+#include <linux/termios.h>
+#include <linux/tty_driver.h>
+#include <linux/netdevice.h>
+#include <linux/wait.h>
+#include <linux/inetdevice.h>
+#include <net/addrconf.h>
+#include <linux/utsname.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/devpts_fs.h>
+#include <linux/shmem_fs.h>
+#include <linux/sysfs.h>
+#include <linux/seq_file.h>
+#include <linux/kernel_stat.h>
+#include <linux/module.h>
+#include <linux/suspend.h>
+#include <linux/rcupdate.h>
+#include <linux/in.h>
+#include <linux/idr.h>
+#include <linux/inetdevice.h>
+#include <linux/pid.h>
+#include <net/pkt_sched.h>
+#include <linux/divert.h>
+#include <ub/beancounter.h>
+#include <linux/kobject.h>
+
+#include <net/route.h>
+#include <net/ip_fib.h>
+#include <net/ip6_route.h>
+#include <net/arp.h>
+#include <net/ipv6.h>
+
+#include <linux/ve_proto.h>
+#include <linux/venet.h>
+#include <linux/vzctl.h>
+#include <linux/vzcalluser.h>
+#ifdef CONFIG_FAIRSCHED
+#include <linux/fairsched.h>
+#endif
+
+#include <linux/nfcalls.h>
+#include <linux/virtinfo.h>
+
+int nr_ve = 1;	/* One VE always exists. Compatibility with vestat */
+EXPORT_SYMBOL(nr_ve);
+
+static int	do_env_enter(struct ve_struct *ve, unsigned int flags);
+static int	alloc_ve_tty_drivers(struct ve_struct* ve);
+static void	free_ve_tty_drivers(struct ve_struct* ve);
+static int	register_ve_tty_drivers(struct ve_struct* ve);
+static void	unregister_ve_tty_drivers(struct ve_struct* ve);
+static int	init_ve_tty_drivers(struct ve_struct *);
+static void	fini_ve_tty_drivers(struct ve_struct *);
+static void	clear_termios(struct tty_driver* driver );
+#ifdef CONFIG_INET
+static void	ve_mapped_devs_cleanup(struct ve_struct *ve);
+#endif
+
+static void vecalls_exit(void);
+
+struct ve_struct *__find_ve_by_id(envid_t veid)
+{
+	struct ve_struct *ve;
+
+	for_each_ve(ve) {
+		if (ve->veid == veid)
+			return ve;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(__find_ve_by_id);
+
+struct ve_struct *get_ve_by_id(envid_t veid)
+{
+	struct ve_struct *ve;
+	read_lock(&ve_list_lock);
+	ve = __find_ve_by_id(veid);
+	get_ve(ve);
+	read_unlock(&ve_list_lock);
+	return ve;
+}
+EXPORT_SYMBOL(get_ve_by_id);
+
+/*
+ * real_put_ve() MUST be used instead of put_ve() inside vecalls.
+ */
+void real_do_env_free(struct ve_struct *ve);
+static inline void real_put_ve(struct ve_struct *ve)
+{
+	if (ve && atomic_dec_and_test(&ve->counter)) {
+		if (atomic_read(&ve->pcounter) > 0)
+			BUG();
+		if (ve->is_running)
+			BUG();
+		real_do_env_free(ve);
+	}
+}
+
+static int ve_get_cpu_stat(envid_t veid, struct vz_cpu_stat __user *buf)
+{
+	struct ve_struct *ve;
+	struct vz_cpu_stat *vstat;
+	int retval;
+	int i, cpu;
+	unsigned long tmp;
+
+	if (!ve_is_super(get_exec_env()) && (veid != get_exec_env()->veid))
+		return -EPERM;
+	if (veid == 0)
+		return -ESRCH;
+
+	vstat = kzalloc(sizeof(*vstat), GFP_KERNEL);
+	if (!vstat)
+		return -ENOMEM;
+
+	retval = -ESRCH;
+	read_lock(&ve_list_lock);
+	ve = __find_ve_by_id(veid);
+	if (ve == NULL)
+		goto out_unlock;
+	for_each_online_cpu(cpu) {
+		struct ve_cpu_stats *st;
+
+		st = VE_CPU_STATS(ve, cpu);
+		vstat->user_jif += (unsigned long)cputime64_to_clock_t(st->user);
+		vstat->nice_jif += (unsigned long)cputime64_to_clock_t(st->nice);
+		vstat->system_jif += (unsigned long)cputime64_to_clock_t(st->system);
+		vstat->idle_clk += __ve_sched_get_idle_time(ve, cpu);
+	}
+	vstat->uptime_clk = get_cycles() - ve->start_cycles;
+	vstat->uptime_jif = (unsigned long)cputime64_to_clock_t(
+				get_jiffies_64() - ve->start_jiffies);
+	for (i = 0; i < 3; i++) {
+		tmp = ve->avenrun[i] + (FIXED_1/200);
+		vstat->avenrun[i].val_int = LOAD_INT(tmp);
+		vstat->avenrun[i].val_frac = LOAD_FRAC(tmp);
+	}
+	read_unlock(&ve_list_lock);
+
+	retval = 0;
+	if (copy_to_user(buf, vstat, sizeof(*vstat)))
+		retval = -EFAULT;
+out_free:
+	kfree(vstat);
+	return retval;
+
+out_unlock:
+	read_unlock(&ve_list_lock);
+	goto out_free;
+}
+
+static int real_setdevperms(envid_t veid, unsigned type,
+		dev_t dev, unsigned mask)
+{
+	struct ve_struct *ve;
+	int err;
+
+	if (!capable(CAP_SETVEID) || veid == 0)
+		return -EPERM;
+
+	if ((ve = get_ve_by_id(veid)) == NULL)
+		return -ESRCH;
+
+	down_read(&ve->op_sem);
+	err = -ESRCH;
+	if (ve->is_running)
+		err = set_device_perms_ve(veid, type, dev, mask);
+	up_read(&ve->op_sem);
+	real_put_ve(ve);
+	return err;
+}
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * FS-related helpers to VE start/stop
+ *
+ **********************************************************************
+ **********************************************************************/
+
+#ifdef CONFIG_SYSCTL
+static ctl_table ve_sysctl_tables[] = {
+	/* kernel */
+	{
+		.ctl_name	= CTL_KERN,
+		.procname	= "kernel",
+		.mode		= 0555,
+		.child		= &ve_sysctl_tables[2],
+	},
+	{ .ctl_name = 0 },
+	/* kernel/[vars] */
+	{
+		.ctl_name	= KERN_NODENAME,
+		.procname	= "hostname",
+		.maxlen 	= 64,
+		.mode		= 0644,
+		.proc_handler	= &proc_doutsstring,
+		.strategy	= &sysctl_string,
+	},
+	{
+		.ctl_name	= KERN_DOMAINNAME,
+		.procname	= "domainname",
+		.maxlen		= 64,
+		.mode		= 0644,
+		.proc_handler	= &proc_doutsstring,
+		.strategy	= &sysctl_string,
+	},
+	{
+		.ctl_name	= KERN_SHMMAX,
+		.procname	= "shmmax",
+		.maxlen		= sizeof(size_t),
+		.mode		= 0644,
+		.proc_handler	= &proc_doulongvec_minmax,
+	},
+	{
+		.ctl_name	= KERN_SHMALL,
+		.procname	= "shmall",
+		.maxlen		= sizeof(size_t),
+		.mode		= 0644,
+		.proc_handler	= &proc_doulongvec_minmax,
+	},
+	{
+		.ctl_name	= KERN_SHMMNI,
+		.procname	= "shmmni",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= KERN_MSGMAX,
+		.procname	= "msgmax",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= KERN_MSGMNI,
+		.procname	= "msgmni",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= KERN_MSGMNB,
+		.procname	= "msgmnb",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= KERN_SEM,
+		.procname	= "sem",
+		.maxlen		= 4 * sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{ .ctl_name = 0, }
+};
+
+static int register_ve_sysctltables(struct ve_struct *ve)
+{
+	struct ctl_table_header *header;
+	ctl_table *root, *table;
+
+	VZTRACE("register_ve_sysctltables\n");
+
+	root = clone_sysctl_template(ve_sysctl_tables);
+	if (root == NULL)
+		goto out;
+
+	table = root->child;
+	table[0].data = &ve->utsname->nodename;
+	table[1].data = &ve->utsname->domainname;
+	table[2].data = &ve->_shm_ctlmax;
+	table[3].data = &ve->_shm_ctlall;
+	table[4].data = &ve->_shm_ctlmni;
+	table[5].data = &ve->_msg_ctlmax;
+	table[6].data = &ve->_msg_ctlmni;
+	table[7].data = &ve->_msg_ctlmnb;
+	table[8].data = &ve->_sem_ctls[0];
+
+	/* insert at head to override kern entries */
+	header = register_sysctl_table(root, 1);
+	if (header == NULL)
+		goto out_free;
+
+	ve->kern_header = header;
+	ve->kern_table = root;
+	return 0;
+
+out_free:
+	free_sysctl_clone(root);
+out:
+	return -ENOMEM;
+}
+
+static inline void unregister_ve_sysctltables(struct ve_struct *ve)
+{
+	unregister_sysctl_table(ve->kern_header);
+}
+
+static inline void free_ve_sysctltables(struct ve_struct *ve)
+{
+	free_sysctl_clone(ve->kern_table);
+}
+#endif
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * VE start: subsystems
+ *
+ **********************************************************************
+ **********************************************************************/
+
+static int init_ve_utsname(struct ve_struct *ve)
+{
+	ve->utsname = kmalloc(sizeof(*ve->utsname), GFP_KERNEL);
+	if (ve->utsname == NULL)
+		return -ENOMEM;
+
+	down_read(&uts_sem); /* protect the source */
+	memcpy(ve->utsname, &system_utsname, sizeof(*ve->utsname));
+	memcpy(ve->utsname->release, virt_utsname.release,
+			sizeof(virt_utsname.release));
+	up_read(&uts_sem);
+
+	return 0;
+}
+
+static void free_ve_utsname(struct ve_struct *ve)
+{
+	kfree(ve->utsname);
+	ve->utsname = NULL;
+}
+
+#ifdef CONFIG_INET
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/icmp.h>
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static int init_fini_ve_mibs6(struct ve_struct *ve, int fini)
+{
+	if (fini)
+		goto fini;
+
+	if (!(ve->_ipv6_statistics[0] = alloc_percpu(struct ipstats_mib)))
+		goto out1;
+	if (!(ve->_ipv6_statistics[1] = alloc_percpu(struct ipstats_mib)))
+		goto out2;
+	if (!(ve->_icmpv6_statistics[0] = alloc_percpu(struct icmpv6_mib)))
+		goto out3;
+	if (!(ve->_icmpv6_statistics[1] = alloc_percpu(struct icmpv6_mib)))
+		goto out4;
+	if (!(ve->_udp_stats_in6[0] = alloc_percpu(struct udp_mib)))
+		goto out5;
+	if (!(ve->_udp_stats_in6[1] = alloc_percpu(struct udp_mib)))
+		goto out6;
+	return 0;
+
+fini:
+	free_percpu(ve->_udp_stats_in6[1]);
+out6:
+	free_percpu(ve->_udp_stats_in6[0]);
+out5:
+	free_percpu(ve->_icmpv6_statistics[1]);
+out4:
+	free_percpu(ve->_icmpv6_statistics[0]);
+out3:
+	free_percpu(ve->_ipv6_statistics[1]);
+out2:
+	free_percpu(ve->_ipv6_statistics[0]);
+out1:
+	return -ENOMEM;
+}
+#else
+static int init_fini_ve_mibs6(struct ve_struct *ve, int fini) { return 0; }
+#endif
+
+static int init_fini_ve_mibs(struct ve_struct *ve, int fini)
+{
+	if (fini)
+		goto fini;
+
+	if (!(ve->_net_statistics[0] = alloc_percpu(struct linux_mib)))
+		goto out1;
+	if (!(ve->_net_statistics[1] = alloc_percpu(struct linux_mib)))
+		goto out2;
+	if (!(ve->_ip_statistics[0] = alloc_percpu(struct ipstats_mib)))
+		goto out3;
+	if (!(ve->_ip_statistics[1] = alloc_percpu(struct ipstats_mib)))
+		goto out4;
+	if (!(ve->_icmp_statistics[0] = alloc_percpu(struct icmp_mib)))
+		goto out5;
+	if (!(ve->_icmp_statistics[1] = alloc_percpu(struct icmp_mib)))
+		goto out6;
+	if (!(ve->_tcp_statistics[0] = alloc_percpu(struct tcp_mib)))
+		goto out7;
+	if (!(ve->_tcp_statistics[1] = alloc_percpu(struct tcp_mib)))
+		goto out8;
+	if (!(ve->_udp_statistics[0] = alloc_percpu(struct udp_mib)))
+		goto out9;
+	if (!(ve->_udp_statistics[1] = alloc_percpu(struct udp_mib)))
+		goto out10;
+	if (init_fini_ve_mibs6(ve, fini))
+		goto out11;
+	return 0;
+
+fini:
+	init_fini_ve_mibs6(ve, fini);
+out11:
+	free_percpu(ve->_udp_statistics[1]);
+out10:
+	free_percpu(ve->_udp_statistics[0]);
+out9:
+	free_percpu(ve->_tcp_statistics[1]);
+out8:
+	free_percpu(ve->_tcp_statistics[0]);
+out7:
+	free_percpu(ve->_icmp_statistics[1]);
+out6:
+	free_percpu(ve->_icmp_statistics[0]);
+out5:
+	free_percpu(ve->_ip_statistics[1]);
+out4:
+	free_percpu(ve->_ip_statistics[0]);
+out3:
+	free_percpu(ve->_net_statistics[1]);
+out2:
+	free_percpu(ve->_net_statistics[0]);
+out1:
+	return -ENOMEM;
+}
+
+static inline int init_ve_mibs(struct ve_struct *ve)
+{
+	return init_fini_ve_mibs(ve, 0);
+}
+
+static inline void fini_ve_mibs(struct ve_struct *ve)
+{
+	(void)init_fini_ve_mibs(ve, 1);
+}
+
+static void veloop_setup(struct net_device *dev)
+{
+	int padded;
+	padded = dev->padded;
+	memcpy(dev, &templ_loopback_dev, sizeof(struct net_device));
+	dev->padded = padded;
+}
+
+static int init_ve_netdev(void)
+{
+	struct ve_struct *ve;
+	struct net_device_stats *stats;
+	int err;
+
+	ve = get_exec_env();
+	INIT_HLIST_HEAD(&ve->_net_dev_head);
+	ve->_net_dev_base = NULL;
+	ve->_net_dev_tail = &ve->_net_dev_base;
+
+	err = -ENOMEM;
+	ve->_loopback_dev = alloc_netdev(0, templ_loopback_dev.name,
+					 veloop_setup);
+	if (ve->_loopback_dev == NULL)
+		goto out;
+
+	ve->_loopback_stats = alloc_percpu(struct net_device_stats);
+	if (ve->_loopback_stats == NULL)
+		goto out_free_netdev;
+	if (loopback_dev.get_stats != NULL) {
+		stats = kzalloc(sizeof(struct net_device_stats), GFP_KERNEL);
+		if (stats != NULL) {
+			ve->_loopback_dev->priv = stats;
+			ve->_loopback_dev->get_stats = loopback_dev.get_stats;
+			ve->_loopback_dev->destructor = loopback_dev.destructor;
+		}
+	}
+	err = register_netdev(ve->_loopback_dev);
+	if (err)
+		goto out_free_stats;
+	return 0;
+
+out_free_stats:
+	if (ve->_loopback_dev->priv != NULL)
+		kfree(ve->_loopback_dev->priv);
+	free_percpu(ve->_loopback_stats);
+out_free_netdev:
+	free_netdev(ve->_loopback_dev);
+out:
+	return err;
+}
+
+static void fini_ve_netdev(void)
+{
+	struct ve_struct *ve;
+	struct net_device *dev;
+
+	ve = get_exec_env();
+	while (1) {
+		rtnl_lock();
+		/*
+		 * loopback is special, it can be referenced in  fib's,
+		 * so it must be freed the last. Doing so is
+		 * sufficient to guarantee absence of such references.
+		 */
+		if (dev_base == ve->_loopback_dev)
+			dev = dev_base->next;
+		else
+			dev = dev_base;
+		if (dev == NULL)
+			break;
+		unregister_netdevice(dev);
+		rtnl_unlock();
+		free_netdev(dev);
+	}
+	unregister_netdevice(ve->_loopback_dev);
+	rtnl_unlock();
+	free_netdev(ve->_loopback_dev);
+	ve->_loopback_dev = NULL;
+
+	free_percpu(ve->_loopback_stats);
+	ve->_loopback_stats = NULL;
+}
+#else
+#define init_ve_mibs(ve)	(0)
+#define fini_ve_mibs(ve)	do { } while (0)
+#define init_ve_netdev()	(0)
+#define fini_ve_netdev()	do { } while (0)
+#endif
+
+static int prepare_proc_root(struct ve_struct *ve)
+{
+	struct proc_dir_entry *de;
+
+	de = kzalloc(sizeof(struct proc_dir_entry) + 6, GFP_KERNEL);
+	if (de == NULL)
+		return -ENOMEM;
+
+	memcpy(de + 1, "/proc", 6);
+	de->name = (char *)(de + 1);
+	de->namelen = 5;
+	de->mode = S_IFDIR | S_IRUGO | S_IXUGO;
+	de->nlink = 2;
+	atomic_set(&de->count, 1);
+
+	ve->proc_root = de;
+	return 0;
+}
+
+#ifdef CONFIG_PROC_FS
+static int init_ve_proc(struct ve_struct *ve)
+{
+	int err;
+	struct proc_dir_entry *de;
+
+	err = prepare_proc_root(ve);
+	if (err)
+		goto out_root;
+
+	err = register_ve_fs_type(ve, &proc_fs_type,
+			&ve->proc_fstype, &ve->proc_mnt);
+	if (err)
+		goto out_reg;
+
+	err = -ENOMEM;
+	de = create_proc_entry("kmsg", S_IRUSR, NULL);
+	if (!de)
+		goto out_kmsg;
+	de->proc_fops = &proc_kmsg_operations;
+
+	/* create necessary /proc subdirs in VE local proc tree */
+	err = -ENOMEM;
+	de = create_proc_entry("vz", S_IFDIR|S_IRUGO|S_IXUGO, NULL);
+	if (!de)
+		goto out_vz;
+
+	proc_net = proc_mkdir("net", NULL);
+	if (!proc_net)
+		goto out_net;
+
+	if (ve_snmp_proc_init())
+		goto out_snmp;
+
+	return 0;
+
+out_snmp:
+	remove_proc_entry("net", NULL);
+out_net:
+	remove_proc_entry("vz", NULL);
+out_vz:
+	remove_proc_entry("kmsg", NULL);
+out_kmsg:
+	unregister_ve_fs_type(ve->proc_fstype, ve->proc_mnt);
+	ve->proc_mnt = NULL;
+out_reg:
+	/* proc_fstype and proc_root are freed in real_put_ve -> free_ve_proc */
+	;
+out_root:
+	return err;
+}
+
+static void fini_ve_proc(struct ve_struct *ve)
+{
+	ve_snmp_proc_fini();
+	remove_proc_entry("net", NULL);
+	proc_net =  NULL;
+	remove_proc_entry("vz", NULL);
+	remove_proc_entry("kmsg", NULL);
+	unregister_ve_fs_type(ve->proc_fstype, ve->proc_mnt);
+	ve->proc_mnt = NULL;
+}
+
+static void free_ve_proc(struct ve_struct *ve)
+{
+	/* proc filesystem frees proc_dir_entries on remove_proc_entry() only,
+	   so we check that everything was removed and not lost */
+	if (ve->proc_root && ve->proc_root->subdir) {
+		struct proc_dir_entry *p = ve->proc_root;
+		printk(KERN_WARNING "VE: %d: proc entry /proc", ve->veid);
+		while ((p = p->subdir) != NULL)
+			printk("/%s", p->name);
+		printk(" is not removed!\n");
+	}
+
+	kfree(ve->proc_root);
+	kfree(ve->proc_fstype);
+
+	ve->proc_fstype = NULL;
+	ve->proc_root = NULL;
+}
+#else
+#define init_ve_proc(ve)	(0)
+#define fini_ve_proc(ve)	do { } while (0)
+#define free_ve_proc(ve)	do { } while (0)
+#endif
+
+#ifdef CONFIG_SYSCTL
+static int init_ve_sysctl(struct ve_struct *ve)
+{
+	int err;
+
+#ifdef CONFIG_PROC_FS
+	err = -ENOMEM;
+	ve->proc_sys_root = proc_mkdir("sys", NULL);
+	if (ve->proc_sys_root == NULL)
+		goto out_proc;
+#endif
+	INIT_LIST_HEAD(&ve->sysctl_lh);
+	err = register_ve_sysctltables(ve);
+	if (err)
+		goto out_reg;
+
+	err = devinet_sysctl_init(ve);
+	if (err)
+		goto out_dev;
+
+	err = addrconf_sysctl_init(ve);
+	if (err)
+		goto out_dev6;
+
+	return 0;
+
+out_dev6:
+	devinet_sysctl_fini(ve);
+out_dev:
+	unregister_ve_sysctltables(ve);
+	free_ve_sysctltables(ve);
+out_reg:
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("sys", NULL);
+out_proc:
+#endif
+	return err;
+}
+
+static void fini_ve_sysctl(struct ve_struct *ve)
+{
+	addrconf_sysctl_fini(ve);
+	devinet_sysctl_fini(ve);
+	unregister_ve_sysctltables(ve);
+	remove_proc_entry("sys", NULL);
+}
+
+static void free_ve_sysctl(struct ve_struct *ve)
+{
+	addrconf_sysctl_free(ve);
+	devinet_sysctl_free(ve);
+	free_ve_sysctltables(ve);
+}
+#else
+#define init_ve_sysctl(ve)	(0)
+#define fini_ve_sysctl(ve)	do { } while (0)
+#define free_ve_sysctl(ve)	do { } while (0)
+#endif
+
+#ifdef CONFIG_UNIX98_PTYS
+#include <linux/devpts_fs.h>
+
+/*
+ * DEVPTS needs a virtualization: each environment should see each own list of
+ * pseudo-terminals.
+ * To implement it we need to have separate devpts superblocks for each
+ * VE, and each VE should mount its own one.
+ * Thus, separate vfsmount structures are required.
+ * To minimize intrusion into vfsmount lookup code, separate file_system_type
+ * structures are created.
+ *
+ * In addition to this, patch fo character device itself is required, as file
+ * system itself is used only for MINOR/MAJOR lookup.
+ */
+
+static int init_ve_devpts(struct ve_struct *ve)
+{
+	int err;
+
+	err = -ENOMEM;
+	ve->devpts_config = kzalloc(sizeof(struct devpts_config), GFP_KERNEL);
+	if (ve->devpts_config == NULL)
+		goto out;
+
+	ve->devpts_config->mode = 0600;
+	err = register_ve_fs_type(ve, &devpts_fs_type,
+			&ve->devpts_fstype, &ve->devpts_mnt);
+	if (err) {
+		kfree(ve->devpts_config);
+		ve->devpts_config = NULL;
+	}
+out:
+	return err;
+}
+
+static void fini_ve_devpts(struct ve_struct *ve)
+{
+	unregister_ve_fs_type(ve->devpts_fstype, ve->devpts_mnt);
+	/* devpts_fstype is freed in real_put_ve -> free_ve_filesystems */
+	ve->devpts_mnt = NULL;
+	kfree(ve->devpts_config);
+	ve->devpts_config = NULL;
+}
+#else
+#define init_ve_devpts(ve)	(0)
+#define fini_ve_devpts(ve)	do { } while (0)
+#endif
+
+static int init_ve_shmem(struct ve_struct *ve)
+{
+	return register_ve_fs_type(ve,
+				   &tmpfs_fs_type,
+				   &ve->shmem_fstype,
+				   &ve->shmem_mnt);
+}
+
+static void fini_ve_shmem(struct ve_struct *ve)
+{
+	unregister_ve_fs_type(ve->shmem_fstype, ve->shmem_mnt);
+	/* shmem_fstype is freed in real_put_ve -> free_ve_filesystems */
+	ve->shmem_mnt = NULL;
+}
+
+static inline int init_ve_sysfs_root(struct ve_struct *ve)
+{
+	struct sysfs_dirent *sysfs_root;
+
+	sysfs_root = kzalloc(sizeof(struct sysfs_dirent), GFP_KERNEL);
+	if (sysfs_root == NULL)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&sysfs_root->s_sibling);
+	INIT_LIST_HEAD(&sysfs_root->s_children);
+	sysfs_root->s_type = SYSFS_ROOT;
+	ve->sysfs_root = sysfs_root;
+	return 0;
+}
+
+#if defined(CONFIG_NET) && defined(CONFIG_SYSFS)
+static inline int init_ve_netclass(struct ve_struct *ve)
+{
+	struct class *nc;
+	int err;
+
+	nc = kzalloc(sizeof(*nc), GFP_KERNEL);
+	if (!nc)
+		return -ENOMEM;
+
+	nc->name = net_class.name;
+	nc->release = net_class.release;
+	nc->uevent = net_class.uevent;
+
+	err = class_register(nc);
+	if (!err) {
+		ve->net_class = nc;
+		return 0;
+	}
+	kfree(nc);	
+	return err;
+}
+
+static inline void fini_ve_netclass(struct ve_struct *ve)
+{
+	class_unregister(ve->net_class);
+	kfree(ve->net_class);
+	ve->net_class = NULL;
+}
+#else
+static inline int init_ve_netclass(struct ve_struct *ve) { return 0; }
+static inline void fini_ve_netclass(struct ve_struct *ve) { ; }
+#endif
+
+static int init_ve_sysfs(struct ve_struct *ve)
+{
+	struct subsystem *subsys;
+	int err;
+
+#ifdef CONFIG_SYSFS
+	err = 0;
+	if (ve->features & VE_FEATURE_SYSFS) {
+		err = init_ve_sysfs_root(ve);
+		if (err != 0)
+			goto out;
+		err = register_ve_fs_type(ve,
+				   &sysfs_fs_type,
+				   &ve->sysfs_fstype,
+				   &ve->sysfs_mnt);
+	}
+	if (err != 0)
+		goto out_fs_type;
+#endif
+	err = -ENOMEM;
+	subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
+	if (subsys == NULL)
+		goto out_class_obj;
+	/* ick, this is ugly, the things we go through to keep from showing up
+	 * in sysfs... */
+	memcpy(&subsys->kset.kobj.name, &class_obj_subsys.kset.kobj.name,
+			sizeof(subsys->kset.kobj.name));
+	subsys->kset.ktype = class_obj_subsys.kset.ktype;
+	subsys->kset.uevent_ops = class_obj_subsys.kset.uevent_ops;
+	subsystem_init(subsys);
+	if (!subsys->kset.subsys)
+			subsys->kset.subsys = subsys;
+	ve->class_obj_subsys = subsys;
+
+	err = -ENOMEM;
+	subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
+	if (subsys == NULL)
+		goto out_class_subsys;
+	/* ick, this is ugly, the things we go through to keep from showing up
+	 * in sysfs... */
+	memcpy(&subsys->kset.kobj.name, &class_subsys.kset.kobj.name,
+			sizeof(subsys->kset.kobj.name));
+	subsys->kset.ktype = class_subsys.kset.ktype;
+	subsys->kset.uevent_ops = class_subsys.kset.uevent_ops;
+	ve->class_subsys = subsys;
+	err = subsystem_register(subsys);
+	if (err != 0)
+		goto out_register;
+
+	err = init_ve_netclass(ve);
+	if (err)
+		goto out_nc;
+
+	ve->tty_class = init_ve_tty_class();
+	if (IS_ERR(ve->tty_class)) {
+		err = PTR_ERR(ve->tty_class);
+		ve->tty_class = NULL;
+		goto out_tty_class_register;
+	}
+
+	return err;
+
+out_tty_class_register:
+	fini_ve_netclass(ve);
+out_nc:
+	subsystem_unregister(subsys);
+out_register:
+	kfree(ve->class_subsys);
+out_class_subsys:
+	kfree(ve->class_obj_subsys);
+out_class_obj:
+#ifdef CONFIG_SYSFS
+	unregister_ve_fs_type(ve->sysfs_fstype, ve->sysfs_mnt);
+	/* sysfs_fstype is freed in real_put_ve -> free_ve_filesystems */
+out_fs_type:
+	kfree(ve->sysfs_root);
+	ve->sysfs_root = NULL;
+#endif
+	ve->class_subsys = NULL;
+	ve->class_obj_subsys = NULL;
+out:
+	return err;
+}
+
+static void fini_ve_sysfs(struct ve_struct *ve)
+{
+	fini_ve_tty_class(ve->tty_class);
+	fini_ve_netclass(ve);
+	subsystem_unregister(ve->class_subsys);
+	kfree(ve->class_subsys);
+	kfree(ve->class_obj_subsys);
+
+	ve->class_subsys = NULL;
+	ve->class_obj_subsys = NULL;
+#ifdef CONFIG_SYSFS
+	unregister_ve_fs_type(ve->sysfs_fstype, ve->sysfs_mnt);
+	ve->sysfs_mnt = NULL;
+	kfree(ve->sysfs_root);
+	ve->sysfs_root = NULL;
+	/* sysfs_fstype is freed in real_put_ve -> free_ve_filesystems */
+#endif
+}
+
+static void free_ve_filesystems(struct ve_struct *ve)
+{
+#ifdef CONFIG_SYSFS
+	kfree(ve->sysfs_fstype);
+	ve->sysfs_fstype = NULL;
+#endif
+	kfree(ve->shmem_fstype);
+	ve->shmem_fstype = NULL;
+
+	kfree(ve->devpts_fstype);
+	ve->devpts_fstype = NULL;
+
+	free_ve_proc(ve);
+}
+
+static int init_printk(struct ve_struct *ve)
+{
+	struct ve_prep_printk {
+		wait_queue_head_t       log_wait;
+		unsigned long           log_start;
+		unsigned long           log_end;
+		unsigned long           logged_chars;
+	} *tmp;
+
+	tmp = kzalloc(sizeof(struct ve_prep_printk), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	init_waitqueue_head(&tmp->log_wait);
+	ve->_log_wait = &tmp->log_wait;
+	ve->_log_start = &tmp->log_start;
+	ve->_log_end = &tmp->log_end;
+	ve->_logged_chars = &tmp->logged_chars;
+	/* ve->log_buf will be initialized later by ve_log_init() */
+	return 0;
+}
+
+static void fini_printk(struct ve_struct *ve)
+{
+	/*
+	 * there is no spinlock protection here because nobody can use
+	 * log_buf at the moments when this code is called.
+	 */
+	kfree(ve->log_buf);
+	kfree(ve->_log_wait);
+}
+
+static void fini_venet(struct ve_struct *ve)
+{
+#ifdef CONFIG_INET
+	tcp_v4_kill_ve_sockets(ve);
+	ve_mapped_devs_cleanup(ve);
+	synchronize_net();
+#endif
+}
+
+static int init_ve_sched(struct ve_struct *ve, unsigned int vcpus)
+{
+#ifdef CONFIG_FAIRSCHED
+	int err;
+
+	/*
+	 * We refuse to switch to an already existing node since nodes
+	 * keep a pointer to their ve_struct...
+	 */
+	err = sys_fairsched_mknod(0, 1, ve->veid);
+	if (err < 0) {
+		printk(KERN_WARNING "Can't create fairsched node %d\n",
+				ve->veid);
+		return err;
+	}
+	err = sys_fairsched_vcpus(ve->veid, vcpus);
+	if (err) {
+		printk(KERN_WARNING "Can't set fairsched vcpus on node %d\n",
+				ve->veid);
+		goto cleanup;
+	}
+	err = sys_fairsched_mvpr(current->pid, ve->veid);
+	if (err) {
+		printk(KERN_WARNING "Can't switch to fairsched node %d\n",
+				ve->veid);
+		goto cleanup;
+	}
+#endif
+	ve_sched_attach(ve);
+	return 0;
+
+#ifdef CONFIG_FAIRSCHED
+cleanup:
+	if (sys_fairsched_rmnod(ve->veid))
+		printk(KERN_ERR "Can't clean fairsched node %d\n",
+				ve->veid);
+	return err;
+#endif
+}
+
+static void fini_ve_sched(struct ve_struct *ve)
+{
+#ifdef CONFIG_FAIRSCHED
+	if (task_vsched_id(current) == ve->veid)
+		if (sys_fairsched_mvpr(current->pid, fairsched_init_node.id))
+			printk(KERN_WARNING "Can't leave fairsched node %d\n",
+					ve->veid);
+	if (sys_fairsched_rmnod(ve->veid))
+		printk(KERN_ERR "Can't remove fairsched node %d\n",
+				ve->veid);
+#endif
+}
+
+static __u64 get_ve_features(env_create_param_t *data, int datalen)
+{
+	__u64 known_features;
+
+	if (datalen < sizeof(struct env_create_param3))
+		/* this version of vzctl is aware of VE_FEATURES_OLD only */
+		known_features = VE_FEATURES_OLD;
+	else
+		known_features = data->known_features;
+
+	/*
+	 * known features are set as required
+	 * yet unknown features are set as in VE_FEATURES_DEF
+	 */
+	return (data->feature_mask & known_features) |
+		(VE_FEATURES_DEF & ~known_features);
+}
+
+static int init_ve_struct(struct ve_struct *ve, envid_t veid,
+		u32 class_id, env_create_param_t *data, int datalen,
+		struct task_struct *init_tsk)
+{
+	int n;
+
+	(void)get_ve(ve);
+	ve->veid = veid;
+	ve->class_id = class_id;
+	ve->init_entry = init_tsk;
+	ve->features = get_ve_features(data, datalen);
+	INIT_LIST_HEAD(&ve->vetask_lh);
+	init_rwsem(&ve->op_sem);
+#ifdef CONFIG_NET
+	ve->ifindex = -1;
+#endif
+
+	for(n = 0; n < UIDHASH_SZ_VE; ++n)
+		INIT_LIST_HEAD(&ve->uidhash_table[n]);
+
+	ve->start_timespec = ve->init_entry->start_time;
+	/* The value is wrong, but it is never compared to process
+	 * start times */
+	ve->start_jiffies = get_jiffies_64();
+	ve->start_cycles = get_cycles();
+	ve->virt_pids = glob_virt_pids;
+
+	return 0;
+}
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * /proc/meminfo virtualization
+ *
+ **********************************************************************
+ **********************************************************************/
+static int ve_set_meminfo(envid_t veid, unsigned long val)
+{
+#ifdef CONFIG_USER_RESOURCE
+	struct ve_struct *ve;
+
+	ve = get_ve_by_id(veid);
+	if (!ve)
+		return -EINVAL;
+
+	ve->meminfo_val = val;
+	real_put_ve(ve);
+	return 0;
+#else
+	return -ENOTTY;
+#endif
+}
+
+static int init_ve_meminfo(struct ve_struct *ve)
+{
+	ve->meminfo_val = 0;
+	return 0;
+}
+
+static inline void fini_ve_meminfo(struct ve_struct *ve)
+{
+}
+
+static void set_ve_root(struct ve_struct *ve, struct task_struct *tsk)
+{
+	read_lock(&tsk->fs->lock);
+	ve->fs_rootmnt = tsk->fs->rootmnt;
+	ve->fs_root = tsk->fs->root;
+	read_unlock(&tsk->fs->lock);
+	mark_tree_virtual(ve->fs_rootmnt, ve->fs_root);
+}
+
+static void set_ve_caps(struct ve_struct *ve, struct task_struct *tsk)
+{
+	/* required for real_setdevperms from register_ve_<fs> above */
+	memcpy(&ve->ve_cap_bset, &tsk->cap_effective, sizeof(kernel_cap_t));
+	cap_lower(ve->ve_cap_bset, CAP_SETVEID);
+}
+
+static int ve_list_add(struct ve_struct *ve)
+{
+	write_lock_irq(&ve_list_lock);
+	if (__find_ve_by_id(ve->veid) != NULL)
+		goto err_exists;
+
+	list_add(&ve->ve_list, &ve_list_head);
+	nr_ve++;
+	write_unlock_irq(&ve_list_lock);
+	return 0;
+
+err_exists:
+	write_unlock_irq(&ve_list_lock);
+	return -EEXIST;
+}
+
+static void ve_list_del(struct ve_struct *ve)
+{
+	write_lock_irq(&ve_list_lock);
+	list_del(&ve->ve_list);
+	nr_ve--;
+	write_unlock_irq(&ve_list_lock);
+}
+
+static void set_task_ve_caps(struct task_struct *tsk, struct ve_struct *ve)
+{
+	spin_lock(&task_capability_lock);
+	cap_mask(tsk->cap_effective, ve->ve_cap_bset);
+	cap_mask(tsk->cap_inheritable, ve->ve_cap_bset);
+	cap_mask(tsk->cap_permitted, ve->ve_cap_bset);
+	spin_unlock(&task_capability_lock);
+}
+
+void ve_move_task(struct task_struct *tsk, struct ve_struct *new,
+		struct ve_struct *old)
+{
+	/* this probihibts ptracing of task entered to VE from host system */
+	tsk->mm->vps_dumpable = 0;
+	/* setup capabilities before enter */
+	set_task_ve_caps(tsk, new);
+
+	write_lock_irq(&tasklist_lock);
+	VE_TASK_INFO(tsk)->owner_env = new;
+	VE_TASK_INFO(tsk)->exec_env = new;
+	REMOVE_VE_LINKS(tsk);
+	SET_VE_LINKS(tsk);
+
+	atomic_dec(&old->pcounter);
+	atomic_inc(&new->pcounter);
+	real_put_ve(old);
+	get_ve(new);
+	write_unlock_irq(&tasklist_lock);
+}
+
+EXPORT_SYMBOL(ve_move_task);
+
+#ifdef CONFIG_VE_IPTABLES
+extern int init_netfilter(void);
+extern void fini_netfilter(void);
+#define init_ve_netfilter()	init_netfilter()
+#define fini_ve_netfilter()	fini_netfilter()
+
+#define KSYMIPTINIT(mask, ve, full_mask, mod, name, args)	\
+({								\
+	int ret = 0;						\
+	if (VE_IPT_CMP(mask, full_mask) &&			\
+		VE_IPT_CMP((ve)->_iptables_modules, 		\
+			full_mask & ~(full_mask##_MOD))) {	\
+		ret = KSYMERRCALL(1, mod, name, args);		\
+		if (ret == 0)					\
+			(ve)->_iptables_modules |=		\
+					full_mask##_MOD;	\
+		if (ret == 1)					\
+			ret = 0;				\
+	}							\
+	ret;							\
+})
+
+#define KSYMIPTFINI(mask, full_mask, mod, name, args)		\
+({								\
+ 	if (VE_IPT_CMP(mask, full_mask##_MOD))			\
+		KSYMSAFECALL_VOID(mod, name, args);		\
+})
+
+
+static int do_ve_iptables(struct ve_struct *ve, __u64 init_mask,
+		int init_or_cleanup)
+{
+	int err;
+
+	/* Remove when userspace will start supplying IPv6-related bits. */
+	init_mask &= ~VE_IP_IPTABLES6;
+	init_mask &= ~VE_IP_FILTER6;
+	init_mask &= ~VE_IP_MANGLE6;
+	init_mask &= ~VE_IP_IPTABLE_NAT_MOD;
+	if ((init_mask & VE_IP_IPTABLES) == VE_IP_IPTABLES)
+		init_mask |= VE_IP_IPTABLES6;
+	if ((init_mask & VE_IP_FILTER) == VE_IP_FILTER)
+		init_mask |= VE_IP_FILTER6;
+	if ((init_mask & VE_IP_MANGLE) == VE_IP_MANGLE)
+		init_mask |= VE_IP_MANGLE6;
+	if ((init_mask & VE_IP_NAT) == VE_IP_NAT)
+		init_mask |= VE_IP_IPTABLE_NAT;
+
+	err = 0;
+	if (!init_or_cleanup)
+		goto cleanup;
+
+	/* init part */
+#if defined(CONFIG_IP_NF_IPTABLES) || \
+    defined(CONFIG_IP_NF_IPTABLES_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_IPTABLES,
+			ip_tables, init_iptables, ());
+	if (err < 0)
+		goto err_iptables;
+#endif
+#if defined(CONFIG_IP6_NF_IPTABLES) || \
+    defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_IPTABLES6,
+			ip6_tables, init_ip6tables, ());
+	if (err < 0)
+		goto err_ip6tables;
+#endif
+#if defined(CONFIG_IP_NF_CONNTRACK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_CONNTRACK,
+			ip_conntrack, init_iptable_conntrack, ());
+	if (err < 0)
+		goto err_iptable_conntrack;
+#endif
+#if defined(CONFIG_IP_NF_FTP) || \
+    defined(CONFIG_IP_NF_FTP_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_CONNTRACK_FTP,
+			ip_conntrack_ftp, init_iptable_ftp, ());
+	if (err < 0)
+		goto err_iptable_ftp;
+#endif
+#if defined(CONFIG_IP_NF_IRC) || \
+    defined(CONFIG_IP_NF_IRC_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_CONNTRACK_IRC,
+			ip_conntrack_irc, init_iptable_irc, ());
+	if (err < 0)
+		goto err_iptable_irc;
+#endif
+#if defined(CONFIG_IP_NF_NAT) || \
+    defined(CONFIG_IP_NF_NAT_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT,
+			ip_nat, ip_nat_init, ());
+	if (err < 0)
+		goto err_iptable_nat;
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_IPTABLE_NAT,
+			iptable_nat, init_iptable_nat, ());
+	if (err < 0)
+		goto err_iptable_nat2;
+#endif
+#if defined(CONFIG_IP_NF_NAT_FTP) || \
+    defined(CONFIG_IP_NF_NAT_FTP_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT_FTP,
+			ip_nat_ftp, init_iptable_nat_ftp, ());
+	if (err < 0)
+		goto err_iptable_nat_ftp;
+#endif
+#if defined(CONFIG_IP_NF_NAT_IRC) || \
+    defined(CONFIG_IP_NF_NAT_IRC_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT_IRC,
+			ip_nat_irc, init_iptable_nat_irc, ());
+	if (err < 0)
+		goto err_iptable_nat_irc;
+#endif
+#if defined(CONFIG_IP_NF_FILTER) || \
+    defined(CONFIG_IP_NF_FILTER_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_FILTER,
+			iptable_filter,	init_iptable_filter, ());
+	if (err < 0)
+		goto err_iptable_filter;
+#endif
+#if defined(CONFIG_IP6_NF_FILTER) || \
+    defined(CONFIG_IP6_NF_FILTER_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_FILTER6,
+			ip6table_filter, init_ip6table_filter, ());
+	if (err < 0)
+		goto err_ip6table_filter;
+#endif
+#if defined(CONFIG_IP_NF_MANGLE) || \
+    defined(CONFIG_IP_NF_MANGLE_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_MANGLE,
+			iptable_mangle,	init_iptable_mangle, ());
+	if (err < 0)
+		goto err_iptable_mangle;
+#endif
+#if defined(CONFIG_IP6_NF_MANGLE) || \
+    defined(CONFIG_IP6_NF_MANGLE_MODULE)
+	err = KSYMIPTINIT(init_mask, ve, VE_IP_MANGLE6,
+			ip6table_mangle, init_ip6table_mangle, ());
+	if (err < 0)
+		goto err_ip6table_mangle;
+#endif
+	return 0;
+
+/* ------------------------------------------------------------------------- */
+
+cleanup:
+#if defined(CONFIG_IP6_NF_MANGLE) || \
+    defined(CONFIG_IP6_NF_MANGLE_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MANGLE6,
+			ip6table_mangle, fini_ip6table_mangle, ());
+err_ip6table_mangle:
+#endif
+#if defined(CONFIG_IP_NF_MANGLE) || \
+    defined(CONFIG_IP_NF_MANGLE_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MANGLE,
+			iptable_mangle,	fini_iptable_mangle, ());
+err_iptable_mangle:
+#endif
+#if defined(CONFIG_IP6_NF_FILTER) || \
+    defined(CONFIG_IP6_NF_FILTER_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_FILTER6,
+			ip6table_filter, fini_ip6table_filter, ());
+err_ip6table_filter:
+#endif
+#if defined(CONFIG_IP_NF_FILTER) || \
+    defined(CONFIG_IP_NF_FILTER_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_FILTER,
+			iptable_filter,	fini_iptable_filter, ());
+err_iptable_filter:
+#endif
+#if defined(CONFIG_IP_NF_NAT_IRC) || \
+    defined(CONFIG_IP_NF_NAT_IRC_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT_IRC,
+			ip_nat_irc, fini_iptable_nat_irc, ());
+err_iptable_nat_irc:
+#endif
+#if defined(CONFIG_IP_NF_NAT_FTP) || \
+    defined(CONFIG_IP_NF_NAT_FTP_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT_FTP,
+			ip_nat_ftp, fini_iptable_nat_ftp, ());
+err_iptable_nat_ftp:
+#endif
+#if defined(CONFIG_IP_NF_NAT) || \
+    defined(CONFIG_IP_NF_NAT_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_IPTABLE_NAT,
+			iptable_nat, fini_iptable_nat, ());
+err_iptable_nat2:
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT,
+			ip_nat, ip_nat_cleanup, ());
+err_iptable_nat:
+#endif
+#if defined(CONFIG_IP_NF_IRC) || \
+    defined(CONFIG_IP_NF_IRC_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_CONNTRACK_IRC,
+			ip_conntrack_irc, fini_iptable_irc, ());
+err_iptable_irc:
+#endif
+#if defined(CONFIG_IP_NF_FTP) || \
+    defined(CONFIG_IP_NF_FTP_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_CONNTRACK_FTP,
+			ip_conntrack_ftp, fini_iptable_ftp, ());
+err_iptable_ftp:
+#endif
+#if defined(CONFIG_IP_NF_CONNTRACK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_CONNTRACK,
+			ip_conntrack, fini_iptable_conntrack, ());
+err_iptable_conntrack:
+#endif
+#if defined(CONFIG_IP6_NF_IPTABLES) || \
+    defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_IPTABLES6,
+			ip6_tables, fini_ip6tables, ());
+err_ip6tables:
+#endif
+#if defined(CONFIG_IP_NF_IPTABLES) || \
+    defined(CONFIG_IP_NF_IPTABLES_MODULE)
+	KSYMIPTFINI(ve->_iptables_modules, VE_IP_IPTABLES,
+			ip_tables, fini_iptables, ());
+err_iptables:
+#endif
+	ve->_iptables_modules = 0;
+
+	return err;
+}
+
+static inline int init_ve_iptables(struct ve_struct *ve, __u64 init_mask)
+{
+	return do_ve_iptables(ve, init_mask, 1);
+}
+
+static inline void fini_ve_iptables(struct ve_struct *ve, __u64 init_mask)
+{
+	(void)do_ve_iptables(ve, init_mask, 0);
+}
+
+#else
+#define init_ve_iptables(x, y)	(0)
+#define fini_ve_iptables(x, y)	do { } while (0)
+#define init_ve_netfilter()	(0)
+#define fini_ve_netfilter()	do { } while (0)
+#endif
+
+static inline int init_ve_cpustats(struct ve_struct *ve)
+{
+	ve->cpu_stats = alloc_percpu(struct ve_cpu_stats);
+	return ve->cpu_stats == NULL ? -ENOMEM : 0;
+}
+
+static inline void free_ve_cpustats(struct ve_struct *ve)
+{
+	free_percpu(ve->cpu_stats);
+	ve->cpu_stats = NULL;
+}
+
+static int do_env_create(envid_t veid, unsigned int flags, u32 class_id,
+			 env_create_param_t *data, int datalen)
+{
+	struct task_struct *tsk;
+	struct ve_struct *old;
+	struct ve_struct *old_exec;
+	struct ve_struct *ve;
+ 	__u64 init_mask;
+	int err;
+
+	tsk = current;
+	old = VE_TASK_INFO(tsk)->owner_env;
+
+	if (!thread_group_leader(tsk))
+		return -EINVAL;
+
+	if (tsk->signal->tty) {
+		printk("ERR: VE init has controlling terminal\n");
+		return -EINVAL;
+	}
+	if (tsk->signal->pgrp != tsk->pid || tsk->signal->session != tsk->pid) {
+		int may_setsid;
+		read_lock(&tasklist_lock);
+		may_setsid = (find_pid(PIDTYPE_PGID, tsk->pid) == NULL);
+		read_unlock(&tasklist_lock);
+		if (!may_setsid) {
+			printk("ERR: VE init is process group leader\n");
+			return -EINVAL;
+		}
+	}
+
+
+	VZTRACE("%s: veid=%d classid=%d pid=%d\n",
+		__FUNCTION__, veid, class_id, current->pid);
+
+	err = -ENOMEM;
+	ve = kzalloc(sizeof(struct ve_struct), GFP_KERNEL);
+	if (ve == NULL)
+		goto err_struct;
+
+	init_ve_struct(ve, veid, class_id, data, datalen, tsk);
+	__module_get(THIS_MODULE);
+	down_write(&ve->op_sem);
+	if (flags & VE_LOCK)
+		ve->is_locked = 1;
+
+	/*
+	 * this should be done before adding to list
+	 * because if calc_load_ve finds this ve in
+	 * list it will be very surprised
+	 */
+	if ((err = init_ve_cpustats(ve)) < 0)
+		goto err_cpu_stats;
+
+	if ((err = ve_list_add(ve)) < 0)
+		goto err_exist;
+
+	/* this should be done before context switching */
+	if ((err = init_printk(ve)) < 0)
+		goto err_log_wait;
+
+	old_exec = set_exec_env(ve);
+
+	if ((err = init_ve_sched(ve, data->total_vcpus)) < 0)
+		goto err_sched;
+
+	/* move user to VE */
+	if ((err = set_user(0, 0)) < 0)
+		goto err_set_user;
+
+	set_ve_root(ve, tsk);
+
+	if ((err = init_ve_utsname(ve)))
+		goto err_utsname;
+
+	if ((err = init_ve_mibs(ve)))
+		goto err_mibs;
+
+	if ((err = init_ve_proc(ve)))
+		goto err_proc;
+
+	if ((err = init_ve_sysctl(ve)))
+		goto err_sysctl;
+
+	if ((err = init_ve_sysfs(ve)))
+		goto err_sysfs;
+
+	if ((err = ve_arp_init(ve)) < 0)
+		goto err_route;
+	if ((err = ve_ndisc_init(ve)) < 0)
+		goto err_route;
+
+	if ((err = init_ve_route(ve)) < 0)
+		goto err_route;
+
+	if ((err = init_ve_route6(ve)) < 0)
+		goto err_route;
+
+	if ((err = init_ve_netdev()))
+		goto err_dev;
+
+	if ((err = init_ve_tty_drivers(ve)) < 0)
+		goto err_tty;
+
+	if ((err = init_ve_shmem(ve)))
+		goto err_shmem;
+
+	if ((err = init_ve_devpts(ve)))
+		goto err_devpts;
+
+	if((err = init_ve_meminfo(ve)))
+		goto err_meminf;
+
+	/* init SYSV IPC variables */
+	if ((err = init_ve_ipc(ve)) < 0)
+		goto err_ipc;
+
+	set_ve_caps(ve, tsk);
+
+	/* It is safe to initialize netfilter here as routing initialization and
+	   interface setup will be done below. This means that NO skb can be
+	   passed inside. Den */
+	/* iptables ve initialization for non ve0;
+	   ve0 init is in module_init */
+	if ((err = init_ve_netfilter()) < 0)
+		goto err_netfilter;
+
+	init_mask = data ? data->iptables_mask : VE_IP_DEFAULT;
+	if ((err = init_ve_iptables(ve, init_mask)) < 0)
+		goto err_iptables;
+
+	if ((err = alloc_vpid(tsk->pid, 1)) < 0)
+		goto err_vpid;
+
+	if ((err = ve_hook_iterate_init(VE_SS_CHAIN, ve)) < 0)
+		goto err_ve_hook;
+
+	/* finally: set vpids and move inside */
+	ve_move_task(tsk, ve, old);
+
+	set_virt_pid(tsk, 1);
+	set_virt_tgid(tsk, 1);
+
+	set_special_pids(tsk->pid, tsk->pid);
+	current->signal->tty_old_pgrp = 0;
+	set_virt_pgid(tsk, 1);
+	set_virt_sid(tsk, 1);
+
+	ve->is_running = 1;
+	up_write(&ve->op_sem);
+
+	printk(KERN_INFO "VE: %d: started\n", veid);
+	return veid;
+
+err_ve_hook:
+	free_vpid(1, ve);
+err_vpid:
+	fini_venet(ve);
+	fini_ve_iptables(ve, init_mask);
+err_iptables:
+	fini_ve_netfilter();
+err_netfilter:
+	fini_ve_ipc(ve);
+err_ipc:
+	fini_ve_meminfo(ve);
+err_meminf:
+	fini_ve_devpts(ve);
+err_devpts:
+	fini_ve_shmem(ve);
+err_shmem:
+	fini_ve_tty_drivers(ve);
+err_tty:
+	fini_ve_netdev();
+err_dev:
+	fini_ve_route(ve);
+	fini_ve_route6(ve);
+err_route:
+	ve_ndisc_fini(ve);
+	ve_arp_fini(ve);
+	fini_ve_sysfs(ve);
+err_sysfs:
+	fini_ve_sysctl(ve);
+err_sysctl:
+	fini_ve_proc(ve);
+err_proc:
+	clean_device_perms_ve(ve->veid);
+	fini_ve_mibs(ve);
+err_mibs:
+	/* free_ve_utsname() is called inside real_put_ve() */ ;
+err_utsname:
+	/* It is safe to restore current->envid here because
+	 * ve_fairsched_detach does not use current->envid. */
+	/* Really fairsched code uses current->envid in sys_fairsched_mknod
+	 * only.  It is correct if sys_fairsched_mknod is called from
+	 * userspace.  If sys_fairsched_mknod is called from
+	 * ve_fairsched_attach, then node->envid and node->parent_node->envid
+	 * are explicitly set to valid value after the call. */
+	/* FIXME */
+	VE_TASK_INFO(tsk)->owner_env = old;
+	VE_TASK_INFO(tsk)->exec_env = old_exec;
+	/* move user back */
+	if (set_user(0, 0) < 0)
+		printk(KERN_WARNING"Can't restore UID\n");
+
+err_set_user:
+	fini_ve_sched(ve);
+err_sched:
+	(void)set_exec_env(old_exec);
+
+	/* we can jump here having incorrect envid */
+	VE_TASK_INFO(tsk)->owner_env = old;
+	fini_printk(ve);
+err_log_wait:
+	/* cpustats will be freed in do_env_free */
+	ve_list_del(ve);
+	up_write(&ve->op_sem);
+
+	real_put_ve(ve);
+err_struct:
+	printk(KERN_INFO "VE: %d: failed to start with err=%d\n", veid, err);
+	return err;
+
+err_exist:
+	free_ve_cpustats(ve);
+err_cpu_stats:
+	kfree(ve);
+	goto err_struct;
+}
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * VE start/stop callbacks
+ *
+ **********************************************************************
+ **********************************************************************/
+
+int real_env_create(envid_t veid, unsigned flags, u32 class_id,
+			env_create_param_t *data, int datalen)
+{
+	int status;
+	struct ve_struct *ve;
+
+	if (!flags) {
+		status = get_exec_env()->veid;
+		goto out;
+	}
+
+	status = -EPERM;
+	if (!capable(CAP_SETVEID))
+		goto out;
+
+	status = -EINVAL;
+	if ((flags & VE_TEST) && (flags & (VE_ENTER|VE_CREATE)))
+		goto out;
+
+	status = -EINVAL;
+	ve = get_ve_by_id(veid);
+	if (ve) {
+		if (flags & VE_TEST) {
+			status = 0;
+			goto out_put;
+		}
+		if (flags & VE_EXCLUSIVE) {
+			status = -EACCES;
+			goto out_put;
+		}
+		if (flags & VE_CREATE) {
+			flags &= ~VE_CREATE;
+			flags |= VE_ENTER;
+		}
+	} else {
+		if (flags & (VE_TEST|VE_ENTER)) {
+			status = -ESRCH;
+			goto out;
+		}
+	}
+
+	if (flags & VE_CREATE) {
+		status = do_env_create(veid, flags, class_id, data, datalen);
+		goto out;
+	} else if (flags & VE_ENTER)
+		status = do_env_enter(ve, flags);
+
+	/* else: returning EINVAL */
+
+out_put:
+	real_put_ve(ve);
+out:
+	return status;
+}
+
+static int do_env_enter(struct ve_struct *ve, unsigned int flags)
+{
+	struct task_struct *tsk = current;
+	int err;
+
+	VZTRACE("%s: veid=%d\n", __FUNCTION__, ve->veid);
+
+	err = -EBUSY;
+	down_read(&ve->op_sem);
+	if (!ve->is_running)
+		goto out_up;
+	if (ve->is_locked && !(flags & VE_SKIPLOCK))
+		goto out_up;
+
+#ifdef CONFIG_FAIRSCHED
+	err = sys_fairsched_mvpr(current->pid, ve->veid);
+	if (err)
+		goto out_up;
+#endif
+
+	ve_sched_attach(ve);
+	ve_move_task(current, ve, VE_TASK_INFO(tsk)->owner_env);
+	err = VE_TASK_INFO(tsk)->owner_env->veid;
+
+out_up:
+	up_read(&ve->op_sem);
+	return err;
+}
+
+static void env_cleanup(struct ve_struct *ve)
+{
+	struct ve_struct *old_ve;
+
+	VZTRACE("real_do_env_cleanup\n");
+
+	down_read(&ve->op_sem);
+	old_ve = set_exec_env(ve);
+
+	ve_hook_iterate_fini(VE_SS_CHAIN, ve);
+
+	fini_venet(ve);
+
+	/* no new packets in flight beyond this point */
+	/* skb hold dst_entry, and in turn lies in the ip fragment queue */
+	ip_fragment_cleanup(ve);
+
+	fini_ve_netdev();
+	fini_ve_route(ve);
+	fini_ve_route6(ve);
+	ve_arp_fini(ve);
+	ve_ndisc_fini(ve);
+
+	/* kill iptables */
+	/* No skb belonging to VE can exist at this point as unregister_netdev
+	   is an operation awaiting until ALL skb's gone */
+	fini_ve_iptables(ve, ve->_iptables_modules);
+	fini_ve_netfilter();
+
+	ve_ipc_cleanup();
+
+	fini_ve_sched(ve);
+	clean_device_perms_ve(ve->veid);
+
+	fini_ve_devpts(ve);
+	fini_ve_shmem(ve);
+	fini_ve_sysfs(ve);
+	unregister_ve_tty_drivers(ve);
+	fini_ve_sysctl(ve);
+	fini_ve_proc(ve);
+	fini_ve_meminfo(ve);
+
+	fini_ve_mibs(ve);
+
+	(void)set_exec_env(old_ve);
+	fini_printk(ve);	/* no printk can happen in ve context anymore */
+
+	ve_list_del(ve);
+	up_read(&ve->op_sem);
+
+	real_put_ve(ve);
+}
+
+static DECLARE_COMPLETION(vzmond_complete);
+static volatile int stop_vzmond;
+
+static int vzmond_helper(void *arg)
+{
+	char name[18];
+	struct ve_struct *ve;
+
+	ve = (struct ve_struct *)arg;
+	snprintf(name, sizeof(name), "vzmond/%d", ve->veid);
+	daemonize(name);
+	env_cleanup(ve);
+	module_put_and_exit(0);
+}
+
+static void do_pending_env_cleanups(void)
+{
+	int err;
+	struct ve_struct *ve;
+
+	spin_lock(&ve_cleanup_lock);
+	while (1) {
+		if (list_empty(&ve_cleanup_list) || need_resched())
+			break;
+
+		ve = list_first_entry(&ve_cleanup_list,
+				struct ve_struct, cleanup_list);
+		list_del(&ve->cleanup_list);
+		spin_unlock(&ve_cleanup_lock);
+
+		__module_get(THIS_MODULE);
+		err = kernel_thread(vzmond_helper, (void *)ve, 0);
+		if (err < 0) {
+			env_cleanup(ve);
+			module_put(THIS_MODULE);
+		}
+
+		spin_lock(&ve_cleanup_lock);
+	}
+	spin_unlock(&ve_cleanup_lock);
+}
+
+static inline int have_pending_cleanups(void)
+{
+	return !list_empty(&ve_cleanup_list);
+}
+
+static int vzmond(void *arg)
+{
+	daemonize("vzmond");
+	set_current_state(TASK_INTERRUPTIBLE);
+
+	while (!stop_vzmond || have_pending_cleanups()) {
+		schedule();
+		try_to_freeze();
+		if (signal_pending(current))
+			flush_signals(current);
+
+		do_pending_env_cleanups();
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (have_pending_cleanups())
+			__set_current_state(TASK_RUNNING);
+	}
+
+	__set_task_state(current, TASK_RUNNING);
+	complete_and_exit(&vzmond_complete, 0);
+}
+
+static int __init init_vzmond(void)
+{
+	int pid;
+	struct task_struct *tsk;
+
+	pid = kernel_thread(vzmond, NULL, 0);
+	if (pid > 0) {
+		tsk = find_task_by_pid_all(pid);
+		BUG_ON(tsk == NULL);
+		ve_cleanup_thread = tsk;
+	}
+	return pid;
+}
+
+static void fini_vzmond(void)
+{
+	stop_vzmond = 1;
+	wake_up_process(ve_cleanup_thread);
+	wait_for_completion(&vzmond_complete);
+	ve_cleanup_thread = NULL;
+	WARN_ON(!list_empty(&ve_cleanup_list));
+}
+
+void real_do_env_free(struct ve_struct *ve)
+{
+	VZTRACE("real_do_env_free\n");
+
+	ve_ipc_free(ve); /* free SYSV IPC resources */
+	free_ve_tty_drivers(ve);
+	free_ve_utsname(ve);
+	free_ve_sysctl(ve); /* free per ve sysctl data */
+	free_ve_filesystems(ve);
+	free_ve_cpustats(ve);
+	printk(KERN_INFO "VE: %d: stopped\n", VEID(ve));
+	kfree(ve);
+
+	module_put(THIS_MODULE);
+}
+EXPORT_SYMBOL(real_do_env_free);
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * VE TTY handling
+ *
+ **********************************************************************
+ **********************************************************************/
+
+static struct tty_driver *alloc_ve_tty_driver(struct tty_driver *base,
+					   struct ve_struct *ve)
+{
+	size_t size;
+	struct tty_driver *driver;
+
+	driver = ub_kmalloc(sizeof(struct tty_driver), GFP_KERNEL);
+	if (!driver)
+		goto out;
+
+	memcpy(driver, base, sizeof(struct tty_driver));
+
+	driver->driver_state = NULL;
+
+	size = base->num * 3 * sizeof(void *);
+	if (!(driver->flags & TTY_DRIVER_DEVPTS_MEM)) {
+		void **p;
+		p = kzalloc(size, GFP_KERNEL_UBC);
+		if (!p)
+			goto out_free;
+
+		driver->ttys = (struct tty_struct **)p;
+		driver->termios = (struct termios **)(p + driver->num);
+		driver->termios_locked = (struct termios **)
+			(p + driver->num * 2);
+	} else {
+		driver->ttys = NULL;
+		driver->termios = NULL;
+		driver->termios_locked = NULL;
+	}
+
+	driver->owner_env = ve;
+	driver->flags |= TTY_DRIVER_INSTALLED;
+	driver->refcount = 0;
+
+	return driver;
+
+out_free:
+	kfree(driver);
+out:
+	return NULL;
+}
+
+static void free_ve_tty_driver(struct tty_driver *driver)
+{
+	if (!driver)
+		return;
+
+	clear_termios(driver);
+	kfree(driver->ttys);
+	kfree(driver);
+}
+
+static int alloc_ve_tty_drivers(struct ve_struct* ve)
+{
+#ifdef CONFIG_LEGACY_PTYS
+	/* Traditional BSD devices */
+	ve->pty_driver = alloc_ve_tty_driver(pty_driver, ve);
+	if (!ve->pty_driver)
+		goto out_mem;
+
+	ve->pty_slave_driver = alloc_ve_tty_driver(pty_slave_driver, ve);
+	if (!ve->pty_slave_driver)
+		goto out_mem;
+
+	ve->pty_driver->other       = ve->pty_slave_driver;
+	ve->pty_slave_driver->other = ve->pty_driver;
+#endif
+
+#ifdef CONFIG_UNIX98_PTYS
+	ve->ptm_driver = alloc_ve_tty_driver(ptm_driver, ve);
+	if (!ve->ptm_driver)
+		goto out_mem;
+
+	ve->pts_driver = alloc_ve_tty_driver(pts_driver, ve);
+	if (!ve->pts_driver)
+		goto out_mem;
+
+	ve->ptm_driver->other = ve->pts_driver;
+	ve->pts_driver->other = ve->ptm_driver;
+
+	ve->allocated_ptys = ub_kmalloc(sizeof(*ve->allocated_ptys),
+			GFP_KERNEL);
+	if (!ve->allocated_ptys)
+		goto out_mem;
+	idr_init(ve->allocated_ptys);
+#endif
+	return 0;
+
+out_mem:
+	free_ve_tty_drivers(ve);
+	return -ENOMEM;
+}
+
+static void free_ve_tty_drivers(struct ve_struct* ve)
+{
+#ifdef CONFIG_LEGACY_PTYS
+	free_ve_tty_driver(ve->pty_driver);
+	free_ve_tty_driver(ve->pty_slave_driver);
+	ve->pty_driver = ve->pty_slave_driver = NULL;
+#endif
+#ifdef CONFIG_UNIX98_PTYS
+	free_ve_tty_driver(ve->ptm_driver);
+	free_ve_tty_driver(ve->pts_driver);
+	kfree(ve->allocated_ptys);
+	ve->ptm_driver = ve->pts_driver = NULL;
+	ve->allocated_ptys = NULL;
+#endif
+}
+
+static inline void __register_tty_driver(struct tty_driver *driver)
+{
+	list_add(&driver->tty_drivers, &tty_drivers);
+}
+
+static inline void __unregister_tty_driver(struct tty_driver *driver)
+{
+	if (!driver)
+		return;
+	list_del(&driver->tty_drivers);
+}
+
+static int register_ve_tty_drivers(struct ve_struct* ve)
+{
+	write_lock_irq(&tty_driver_guard);
+#ifdef CONFIG_UNIX98_PTYS
+	__register_tty_driver(ve->ptm_driver);
+	__register_tty_driver(ve->pts_driver);
+#endif
+#ifdef CONFIG_LEGACY_PTYS
+	__register_tty_driver(ve->pty_driver);
+	__register_tty_driver(ve->pty_slave_driver);
+#endif
+	write_unlock_irq(&tty_driver_guard);
+
+	return 0;
+}
+
+static void unregister_ve_tty_drivers(struct ve_struct* ve)
+{
+	VZTRACE("unregister_ve_tty_drivers\n");
+
+	write_lock_irq(&tty_driver_guard);
+#ifdef CONFIG_LEGACY_PTYS
+	__unregister_tty_driver(ve->pty_driver);
+	__unregister_tty_driver(ve->pty_slave_driver);
+#endif
+#ifdef CONFIG_UNIX98_PTYS
+	__unregister_tty_driver(ve->ptm_driver);
+	__unregister_tty_driver(ve->pts_driver);
+#endif
+	write_unlock_irq(&tty_driver_guard);
+}
+
+static int init_ve_tty_drivers(struct ve_struct *ve)
+{
+	int err;
+
+	if ((err = alloc_ve_tty_drivers(ve)))
+		goto err_ttyalloc;
+	if ((err = register_ve_tty_drivers(ve)))
+		goto err_ttyreg;
+	return 0;
+
+err_ttyreg:
+	free_ve_tty_drivers(ve);
+err_ttyalloc:
+	return err;
+}
+
+static void fini_ve_tty_drivers(struct ve_struct *ve)
+{
+	unregister_ve_tty_drivers(ve);
+	free_ve_tty_drivers(ve);
+}
+
+/*
+ * Free the termios and termios_locked structures because
+ * we don't want to get memory leaks when modular tty
+ * drivers are removed from the kernel.
+ */
+static void clear_termios(struct tty_driver *driver)
+{
+	int i;
+	struct termios *tp;
+
+	if (driver->termios == NULL)
+		return;
+	for (i = 0; i < driver->num; i++) {
+		tp = driver->termios[i];
+		if (tp) {
+			driver->termios[i] = NULL;
+			kfree(tp);
+		}
+		tp = driver->termios_locked[i];
+		if (tp) {
+			driver->termios_locked[i] = NULL;
+			kfree(tp);
+		}
+	}
+}
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * Pieces of VE network
+ *
+ **********************************************************************
+ **********************************************************************/
+
+#ifdef CONFIG_NET
+#include <asm/uaccess.h>
+#include <net/sock.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <net/route.h>
+#include <net/ip_fib.h>
+#endif
+
+#ifdef CONFIG_INET
+static void ve_del_ip_addrs(struct net_device *dev)
+{
+	struct in_device *in_dev;
+
+	in_dev = in_dev_get(dev);
+	if (in_dev == NULL)
+		return;
+
+	while (in_dev->ifa_list != NULL) {
+		inet_del_ifa(in_dev, &in_dev->ifa_list, 1);
+	}
+	in_dev_put(in_dev);
+}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static void ve_del_ipv6_addrs(struct net_device *dev)
+{
+	addrconf_ifdown(dev, 2);
+}
+#else
+#define ve_del_ipv6_addrs(dev) do { } while (0)
+#endif
+
+static int ve_netdev_cleanup(struct net_device *dev, int to_ve)
+{
+	int err;
+
+	err = 0;
+	ve_del_ip_addrs(dev);
+	ve_del_ipv6_addrs(dev);
+	if ((dev->flags & IFF_UP) != 0)
+		err = dev_close(dev);
+	synchronize_net();
+	dev_shutdown(dev);
+	dev_mc_discard(dev);
+	free_divert_blk(dev);
+	synchronize_net();
+	return err;
+}
+
+static void __ve_dev_move(struct net_device *dev, struct ve_struct *ve_src,
+	struct ve_struct *ve_dst, struct user_beancounter *exec_ub)
+{
+	struct net_device **dp, *d;
+	struct user_beancounter *ub;
+	struct ve_struct *exec_ve;
+
+	for (d = ve_src->_net_dev_base, dp = NULL; d != NULL;
+	     dp = &d->next, d = d->next) {
+		if (d == dev) {
+			hlist_del(&dev->name_hlist);
+			hlist_del(&dev->index_hlist);
+			if (ve_src->_net_dev_tail == &dev->next)
+				ve_src->_net_dev_tail = dp;
+			if (dp)
+				*dp = dev->next;
+			dev->next = NULL;
+			break;
+		}
+	}
+	*ve_dst->_net_dev_tail = dev;
+	ve_dst->_net_dev_tail = &dev->next;
+	hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name, ve_dst));
+	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex, ve_dst));
+	dev->owner_env = ve_dst;
+
+	ub = netdev_bc(dev)->exec_ub;
+	netdev_bc(dev)->exec_ub = get_beancounter(exec_ub);
+	put_beancounter(ub);
+
+	write_unlock_bh(&dev_base_lock);
+
+	exec_ve = set_exec_env(ve_src);
+	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+	(void)set_exec_env(ve_dst);
+	call_netdevice_notifiers(NETDEV_REGISTER, dev);
+	(void)set_exec_env(exec_ve);
+
+	write_lock_bh(&dev_base_lock);
+}
+
+static int ve_dev_add(envid_t veid, char *dev_name)
+{
+	int err;
+	struct net_device *dev;
+	struct ve_struct *ve;
+	struct hlist_node *p;
+	struct hlist_head *head;
+
+	dev = NULL;
+	err = -ESRCH;
+
+	ve = get_ve_by_id(veid);
+	if (ve == NULL)
+		goto out;
+
+	rtnl_lock();
+
+	read_lock(&dev_base_lock);
+	hlist_for_each(p, dev_name_hash(dev_name, get_ve0())) {
+		struct net_device *d = hlist_entry(p, struct net_device,
+						   name_hlist);
+		if (strncmp(d->name, dev_name, IFNAMSIZ) == 0) {
+			dev = d;
+			break;
+		}
+	}
+	read_unlock(&dev_base_lock);
+	if (dev == NULL)
+		goto out_unlock;
+
+	err = -EPERM;
+	if (!ve_is_dev_movable(dev))
+		goto out_unlock;
+
+	err = -EINVAL;
+	if (dev->flags & (IFF_SLAVE|IFF_MASTER))
+		goto out_unlock;
+
+	/* Check for existence of name */
+	head = dev_name_hash(dev->name, ve);
+	hlist_for_each(p, head) {
+		struct net_device *d
+			= hlist_entry(p, struct net_device, name_hlist);
+		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
+			err = -EEXIST;
+ 			goto out_unlock;
+		}
+ 	}
+
+	ve_netdev_cleanup(dev, 1);
+
+	write_lock_bh(&dev_base_lock);
+	__ve_dev_move(dev, get_ve0(), ve, get_exec_ub());
+	write_unlock_bh(&dev_base_lock);
+
+	err = 0;
+
+out_unlock:
+	rtnl_unlock();
+	real_put_ve(ve);
+
+	if (dev == NULL)
+		printk(KERN_WARNING "Device %s not found\n", dev_name);
+
+out:
+	return err;
+}
+
+static int ve_dev_del(envid_t veid, char *dev_name)
+{
+	int err;
+	struct net_device *dev;
+	struct ve_struct *ve, *old_exec;
+	struct hlist_node *p;
+
+	dev = NULL;
+	err = -ESRCH;
+
+	ve = get_ve_by_id(veid);
+	if (ve == NULL)
+		goto out;
+
+	rtnl_lock();
+
+	read_lock(&dev_base_lock);
+	hlist_for_each(p, dev_name_hash(dev_name, ve)) {
+		struct net_device *d = hlist_entry(p, struct net_device,
+						   name_hlist);
+		if (strncmp(d->name, dev_name, IFNAMSIZ) == 0) {
+			dev = d;
+			break;
+		}
+	}
+	read_unlock(&dev_base_lock);
+	if (dev == NULL)
+		goto out_unlock;
+
+	err = -EPERM;
+	if (!ve_is_dev_movable(dev))
+		goto out_unlock;
+
+	old_exec = set_exec_env(ve);
+	ve_netdev_cleanup(dev, 0);
+	(void)set_exec_env(old_exec);
+
+	write_lock_bh(&dev_base_lock);
+	__ve_dev_move(dev, ve, get_ve0(), netdev_bc(dev)->owner_ub);
+	write_unlock_bh(&dev_base_lock);
+
+	err = 0;
+
+out_unlock:
+	rtnl_unlock();
+	real_put_ve(ve);
+
+	if (dev == NULL)
+		printk(KERN_WARNING "Device %s not found\n", dev_name);
+
+out:
+	return err;
+}
+
+int real_ve_dev_map(envid_t veid, int op, char *dev_name)
+{
+	int err;
+	err = -EPERM;
+	if (!capable(CAP_SETVEID))
+		goto out;
+	switch (op)
+	{
+		case VE_NETDEV_ADD:
+			err = ve_dev_add(veid, dev_name);
+			break;
+		case VE_NETDEV_DEL:
+			err = ve_dev_del(veid, dev_name);
+			break;
+		default:
+			err = -EINVAL;
+			break;
+	}
+out:
+	return err;
+}
+
+static void ve_mapped_devs_cleanup(struct ve_struct *ve)
+{
+	struct net_device *dev;
+
+	rtnl_lock();
+	write_lock_bh(&dev_base_lock);
+restart:
+	for (dev = ve->_net_dev_base; dev != NULL; dev = dev->next)
+	{
+		if ((dev->features & NETIF_F_VENET) ||
+		    (dev == ve->_loopback_dev)) /* Skip loopback dev */
+			continue;
+		write_unlock_bh(&dev_base_lock);
+		ve_netdev_cleanup(dev, 0);
+		write_lock_bh(&dev_base_lock);
+		__ve_dev_move(dev, ve, get_ve0(), netdev_bc(dev)->owner_ub);
+		goto restart;
+	}
+	write_unlock_bh(&dev_base_lock);
+	rtnl_unlock();
+}
+#endif
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * VE information via /proc
+ *
+ **********************************************************************
+ **********************************************************************/
+#ifdef CONFIG_PROC_FS
+#if BITS_PER_LONG == 32
+#define VESTAT_LINE_WIDTH (6 * 11 + 6 * 21)
+#define VESTAT_LINE_FMT "%10u %10lu %10lu %10lu %10Lu %20Lu %20Lu %20Lu %20Lu %20Lu %20Lu %10lu\n"
+#define VESTAT_HEAD_FMT "%10s %10s %10s %10s %10s %20s %20s %20s %20s %20s %20s %10s\n"
+#else
+#define VESTAT_LINE_WIDTH (12 * 21)
+#define VESTAT_LINE_FMT "%20u %20lu %20lu %20lu %20Lu %20Lu %20Lu %20Lu %20Lu %20Lu %20Lu %20lu\n"
+#define VESTAT_HEAD_FMT "%20s %20s %20s %20s %20s %20s %20s %20s %20s %20s %20s %20s\n"
+#endif
+
+static int vestat_seq_show(struct seq_file *m, void *v)
+{
+	struct list_head *entry;
+	struct ve_struct *ve;
+	struct ve_struct *curve;
+	int cpu;
+	unsigned long user_ve, nice_ve, system_ve;
+	unsigned long long uptime;
+	cycles_t uptime_cycles, idle_time, strv_time, used;
+
+	entry = (struct list_head *)v;
+	ve = list_entry(entry, struct ve_struct, ve_list);
+
+	curve = get_exec_env();
+	if (entry == ve_list_head.next ||
+	    (!ve_is_super(curve) && ve == curve)) {
+		/* print header */
+		seq_printf(m, "%-*s\n",
+			VESTAT_LINE_WIDTH - 1,
+			"Version: 2.2");
+		seq_printf(m, VESTAT_HEAD_FMT, "VEID",
+					"user", "nice", "system",
+					"uptime", "idle",
+					"strv", "uptime", "used",
+					"maxlat", "totlat", "numsched");
+	}
+
+	if (ve == get_ve0())
+		return 0;
+
+	user_ve = nice_ve = system_ve = 0;
+	idle_time = strv_time = used = 0;
+
+	for_each_online_cpu(cpu) {
+		struct ve_cpu_stats *st;
+
+		st = VE_CPU_STATS(ve, cpu);
+		user_ve += st->user;
+		nice_ve += st->nice;
+		system_ve += st->system;
+		used += st->used_time;
+		idle_time += __ve_sched_get_idle_time(ve, cpu);
+	}
+	uptime_cycles = get_cycles() - ve->start_cycles;
+	uptime = get_jiffies_64() - ve->start_jiffies;
+
+	seq_printf(m, VESTAT_LINE_FMT, ve->veid,
+				user_ve, nice_ve, system_ve,
+				(unsigned long long)uptime,
+				(unsigned long long)idle_time, 
+				(unsigned long long)strv_time,
+				(unsigned long long)uptime_cycles,
+				(unsigned long long)used,
+				(unsigned long long)ve->sched_lat_ve.last.maxlat,
+				(unsigned long long)ve->sched_lat_ve.last.totlat,
+				ve->sched_lat_ve.last.count);
+	return 0;
+}
+
+static void *ve_seq_start(struct seq_file *m, loff_t *pos)
+{
+	struct ve_struct *curve;
+	struct list_head *entry;
+	loff_t l;
+
+	curve = get_exec_env();
+	read_lock(&ve_list_lock);
+	if (!ve_is_super(curve)) {
+		if (*pos != 0)
+			return NULL;
+		return curve;
+	}
+
+	l = *pos;
+	list_for_each(entry, &ve_list_head) {
+		if (l == 0)
+			return entry;
+		l--;
+	}
+	return NULL;
+}
+
+static void *ve_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct list_head *entry;
+
+	entry = (struct list_head *)v;
+	if (!ve_is_super(get_exec_env()))
+		return NULL;
+	(*pos)++;
+	return entry->next == &ve_list_head ? NULL : entry->next;
+}
+
+static void ve_seq_stop(struct seq_file *m, void *v)
+{
+	read_unlock(&ve_list_lock);
+}
+
+static struct seq_operations vestat_seq_op = {
+        .start	= ve_seq_start,
+        .next	= ve_seq_next,
+        .stop	= ve_seq_stop,
+        .show	= vestat_seq_show
+};
+
+static int vestat_open(struct inode *inode, struct file *file)
+{
+        return seq_open(file, &vestat_seq_op);
+}
+
+static struct file_operations proc_vestat_operations = {
+        .open	 = vestat_open,
+        .read	 = seq_read,
+        .llseek	 = seq_lseek,
+        .release = seq_release
+};
+
+static inline unsigned long ve_used_mem(struct user_beancounter *ub)
+{
+	extern int glob_ve_meminfo;
+	return glob_ve_meminfo ? ub->ub_parms[UB_OOMGUARPAGES].held :
+				 ub->ub_parms[UB_PRIVVMPAGES].held ;
+}
+
+static inline void ve_mi_replace(struct meminfo *mi)
+{
+#ifdef CONFIG_USER_RESOURCE
+	struct user_beancounter *ub;
+	unsigned long meminfo_val;
+	unsigned long nodettram;
+	unsigned long usedmem;
+
+	meminfo_val = get_exec_env()->meminfo_val;
+
+	if(!meminfo_val)
+		return; /* No virtualization */
+
+	nodettram = mi->si.totalram;
+	ub = current->mm->mm_ub;
+	usedmem = ve_used_mem(ub);
+
+	memset(mi, 0, sizeof(*mi));
+
+	mi->si.totalram = (meminfo_val > nodettram) ?
+			nodettram : meminfo_val;
+	mi->si.freeram = (mi->si.totalram > usedmem) ?
+			(mi->si.totalram - usedmem) : 0;
+#else
+	return;
+#endif
+}
+
+static int meminfo_call(struct vnotifier_block *self,
+                unsigned long event, void *arg, int old_ret)
+{
+	if (event != VIRTINFO_MEMINFO)
+		return old_ret;
+
+	ve_mi_replace((struct meminfo *)arg);
+
+	return NOTIFY_OK;
+}
+
+
+static struct vnotifier_block meminfo_notifier_block = {
+	.notifier_call = meminfo_call
+};
+
+static int __init init_vecalls_proc(void)
+{
+	struct proc_dir_entry *de;
+
+	de = create_proc_glob_entry_mod("vz/vestat",
+			S_IFREG|S_IRUSR, NULL, THIS_MODULE);
+	if (de == NULL) {
+		/* create "vz" subdirectory, if not exist */
+		(void) create_proc_glob_entry("vz",
+					      S_IFDIR|S_IRUGO|S_IXUGO, NULL);
+		de = create_proc_glob_entry_mod("vz/vestat",
+				S_IFREG|S_IRUSR, NULL, THIS_MODULE);
+	}
+	if (de)
+		de->proc_fops = &proc_vestat_operations;
+	else
+		printk(KERN_WARNING
+				"VZMON: can't make vestat proc entry\n");
+
+	de = create_proc_entry_mod("vz/devperms", S_IFREG | S_IRUSR, NULL,
+				THIS_MODULE);
+	if (de)
+		de->proc_fops = &proc_devperms_ops;
+	else
+		printk(KERN_WARNING
+				"VZMON: can't make devperms proc entry\n");
+
+	virtinfo_notifier_register(VITYPE_GENERAL, &meminfo_notifier_block);
+
+	return 0;
+}
+
+static void fini_vecalls_proc(void)
+{
+	remove_proc_entry("vz/devperms", NULL);
+	remove_proc_entry("vz/vestat", NULL);
+	virtinfo_notifier_unregister(VITYPE_GENERAL, &meminfo_notifier_block);
+}
+#else
+#define init_vecalls_proc()	(0)
+#define fini_vecalls_proc()	do { } while (0)
+#endif /* CONFIG_PROC_FS */
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * User ctl
+ *
+ **********************************************************************
+ **********************************************************************/
+
+int vzcalls_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int err;
+
+	err = -ENOTTY;
+	switch(cmd) {
+	    case VZCTL_MARK_ENV_TO_DOWN: {
+		        /* Compatibility issue */
+		        err = 0;
+		}
+		break;
+	    case VZCTL_SETDEVPERMS: {
+			/* Device type was mistakenly declared as dev_t
+			 * in the old user-kernel interface.
+			 * That's wrong, dev_t is a kernel internal type.
+			 * I use `unsigned' not having anything better in mind.
+			 * 2001/08/11  SAW  */
+			struct vzctl_setdevperms s;
+			err = -EFAULT;
+			if (copy_from_user(&s, (void __user *)arg, sizeof(s)))
+				break;
+			err = real_setdevperms(s.veid, s.type,
+					new_decode_dev(s.dev), s.mask);
+		}
+		break;
+#ifdef CONFIG_INET
+	    case VZCTL_VE_NETDEV: {
+			struct vzctl_ve_netdev d;
+			char *s;
+			err = -EFAULT;
+			if (copy_from_user(&d, (void __user *)arg, sizeof(d)))
+				break;
+			err = -ENOMEM;
+			s = kmalloc(IFNAMSIZ+1, GFP_KERNEL);
+			if (s == NULL)
+				break;
+			err = -EFAULT;
+			if (strncpy_from_user(s, d.dev_name, IFNAMSIZ) > 0) {
+				s[IFNAMSIZ] = 0;
+				err = real_ve_dev_map(d.veid, d.op, s);
+			}
+			kfree(s);
+		}
+		break;
+#endif
+	    case VZCTL_ENV_CREATE: {
+			struct vzctl_env_create s;
+			err = -EFAULT;
+			if (copy_from_user(&s, (void __user *)arg, sizeof(s)))
+				break;
+			err = real_env_create(s.veid, s.flags, s.class_id,
+				NULL, 0);
+		}
+		break;
+	    case VZCTL_ENV_CREATE_DATA: {
+			struct vzctl_env_create_data s;
+			env_create_param_t *data;
+			err = -EFAULT;
+			if (copy_from_user(&s, (void __user *)arg, sizeof(s)))
+				break;
+			err=-EINVAL;
+			if (s.datalen < VZCTL_ENV_CREATE_DATA_MINLEN ||
+			    s.datalen > VZCTL_ENV_CREATE_DATA_MAXLEN ||
+			    s.data == 0)
+				break;
+			err = -ENOMEM;
+			data = kzalloc(sizeof(*data), GFP_KERNEL);
+			if (!data)
+				break;
+
+			err = -EFAULT;
+			if (copy_from_user(data, (void __user *)s.data,
+						s.datalen))
+				goto free_data;
+			err = real_env_create(s.veid, s.flags, s.class_id,
+				data, s.datalen);
+free_data:
+			kfree(data);
+		}
+		break;
+	    case VZCTL_GET_CPU_STAT: {
+			struct vzctl_cpustatctl s;
+			err = -EFAULT;
+			if (copy_from_user(&s, (void __user *)arg, sizeof(s)))
+				break;
+			err = ve_get_cpu_stat(s.veid, s.cpustat);
+		}
+		break;
+	    case VZCTL_VE_MEMINFO: {
+			struct vzctl_ve_meminfo s;
+			err = -EFAULT;
+			if (copy_from_user(&s, (void __user *)arg, sizeof(s)))
+				break;
+			err = ve_set_meminfo(s.veid, s.val);
+		}
+		break;
+	}
+	return err;
+}
+
+#ifdef CONFIG_COMPAT
+int compat_vzcalls_ioctl(struct file *file, unsigned int cmd,
+		unsigned long arg)
+{
+	int err;
+
+	switch(cmd) {
+	case VZCTL_GET_CPU_STAT: {
+		/* FIXME */
+	}
+	case VZCTL_COMPAT_ENV_CREATE_DATA: {
+		struct compat_vzctl_env_create_data cs;
+		struct vzctl_env_create_data __user *s;
+
+		s = compat_alloc_user_space(sizeof(*s));
+		err = -EFAULT;
+		if (copy_from_user(&cs, (void *)arg, sizeof(cs)))
+			break;
+
+		if (put_user(cs.veid, &s->veid) ||
+		    put_user(cs.flags, &s->flags) ||
+		    put_user(cs.class_id, &s->class_id) ||
+		    put_user(compat_ptr(cs.data), &s->data) ||
+		    put_user(cs.datalen, &s->datalen))
+			break;
+		err = vzcalls_ioctl(file, VZCTL_ENV_CREATE_DATA,
+						(unsigned long)s);
+		break;
+	}
+#ifdef CONFIG_NET
+	case VZCTL_COMPAT_VE_NETDEV: {
+		struct compat_vzctl_ve_netdev cs;
+		struct vzctl_ve_netdev __user *s;
+
+		s = compat_alloc_user_space(sizeof(*s));
+		err = -EFAULT;
+		if (copy_from_user(&cs, (void *)arg, sizeof(cs)))
+			break;
+
+		if (put_user(cs.veid, &s->veid) ||
+		    put_user(cs.op, &s->op) ||
+		    put_user(compat_ptr(cs.dev_name), &s->dev_name))
+			break;
+		err = vzcalls_ioctl(file, VZCTL_VE_NETDEV, (unsigned long)s);
+		break;
+	}
+#endif
+	case VZCTL_COMPAT_VE_MEMINFO: {
+		struct compat_vzctl_ve_meminfo cs;
+		err = -EFAULT;
+		if (copy_from_user(&cs, (void *)arg, sizeof(cs)))
+			break;
+		err = ve_set_meminfo(cs.veid, cs.val);
+		break;
+	}
+	default:
+		err = vzcalls_ioctl(file, cmd, arg);
+		break;
+	}
+	return err;
+}
+#endif
+
+static struct vzioctlinfo vzcalls = {
+	.type		= VZCTLTYPE,
+	.ioctl		= vzcalls_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= compat_vzcalls_ioctl,
+#endif
+	.owner		= THIS_MODULE,
+};
+
+
+/**********************************************************************
+ **********************************************************************
+ *
+ * Init/exit stuff
+ *
+ **********************************************************************
+ **********************************************************************/
+
+static int __init init_vecalls_symbols(void)
+{
+	KSYMRESOLVE(real_do_env_free);
+	KSYMMODRESOLVE(vzmon);
+	return 0;
+}
+EXPORT_SYMBOL(real_env_create);
+
+static void fini_vecalls_symbols(void)
+{
+	KSYMMODUNRESOLVE(vzmon);
+	KSYMUNRESOLVE(real_do_env_free);
+}
+
+static inline __init int init_vecalls_ioctls(void)
+{
+	vzioctl_register(&vzcalls);
+	return 0;
+}
+
+static inline void fini_vecalls_ioctls(void)
+{
+	vzioctl_unregister(&vzcalls);
+}
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_header *table_header;
+
+static ctl_table kernel_table[] = {
+	{
+		.ctl_name	= KERN_VE_ALLOW_KTHREADS,
+		.procname	= "ve_allow_kthreads",
+		.data		= &ve_allow_kthreads,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{ 0 }
+};
+
+static ctl_table root_table[] =  {
+	{CTL_KERN, "kernel",  NULL, 0, 0555, kernel_table},
+	{ 0 }
+};
+
+static int init_vecalls_sysctl(void)
+{
+	table_header = register_sysctl_table(root_table, 0);
+	if (!table_header)
+		return -ENOMEM ;
+	return 0;
+}
+
+static void fini_vecalls_sysctl(void)
+{
+	unregister_sysctl_table(table_header);
+} 
+#else
+static int init_vecalls_sysctl(void) { return 0; }
+static void fini_vecalls_sysctl(void) { ; }
+#endif
+
+static int __init vecalls_init(void)
+{
+	int err;
+
+	err = init_vecalls_sysctl();
+	if (err)
+		goto out_vzmond;
+
+	init_rwsem(&get_ve0()->op_sem);
+
+	err = init_vzmond();
+	if (err < 0)
+		goto out_sysctl;
+
+	err = init_vecalls_symbols();
+	if (err < 0)
+		goto out_sym;
+
+	err = init_vecalls_proc();
+	if (err < 0)
+		goto out_proc;
+
+	err = init_vecalls_ioctls();
+	if (err < 0)
+		goto out_ioctls;
+
+	return 0;
+
+out_ioctls:
+	fini_vecalls_proc();
+out_proc:
+	fini_vecalls_symbols();
+out_sym:
+	fini_vzmond();
+out_sysctl:
+	fini_vecalls_sysctl();
+out_vzmond:
+	return err;
+}
+
+static void vecalls_exit(void)
+{
+	fini_vecalls_ioctls();
+	fini_vecalls_proc();
+	fini_vecalls_symbols();
+	fini_vzmond();
+	fini_vecalls_sysctl();
+}
+
+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
+MODULE_DESCRIPTION("Virtuozzo Control");
+MODULE_LICENSE("GPL v2");
+
+module_init(vecalls_init)
+module_exit(vecalls_exit)
diff -upr linux-2.6.16.46-0.12.orig/kernel/ve/veowner.c linux-2.6.16.46-0.12-027test011/kernel/ve/veowner.c
--- linux-2.6.16.46-0.12.orig/kernel/ve/veowner.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/ve/veowner.c	2007-08-28 17:35:36.000000000 +0400
@@ -0,0 +1,291 @@
+/*
+ *  kernel/veowner.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/ve.h>
+#include <linux/ve_proto.h>
+#include <linux/ipc.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/delay.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/inetdevice.h>
+#include <asm/system.h>
+#include <asm/io.h>
+
+#include <net/tcp.h>
+
+void prepare_ve0_process(struct task_struct *tsk)
+{
+	set_virt_pid(tsk, tsk->pid);
+	set_virt_tgid(tsk, tsk->tgid);
+	if (tsk->signal) {
+		set_virt_pgid(tsk, tsk->signal->pgrp);
+		set_virt_sid(tsk, tsk->signal->session);
+	}
+	VE_TASK_INFO(tsk)->exec_env = get_ve0();
+	VE_TASK_INFO(tsk)->owner_env = get_ve0();
+	VE_TASK_INFO(tsk)->sleep_time = 0;
+	VE_TASK_INFO(tsk)->wakeup_stamp = 0;
+	VE_TASK_INFO(tsk)->sched_time = 0;
+	seqcount_init(&VE_TASK_INFO(tsk)->wakeup_lock);
+
+	if (tsk->pid) {
+		SET_VE_LINKS(tsk);
+		atomic_inc(&get_ve0()->pcounter);
+	}
+}
+
+#ifdef CONFIG_NET
+void prepare_ve0_loopback(void)
+{
+	get_ve0()->_loopback_dev = &loopback_dev;
+}
+#endif
+
+/*
+ * ------------------------------------------------------------------------
+ * proc entries
+ * ------------------------------------------------------------------------
+ */
+
+#ifdef CONFIG_PROC_FS
+static void proc_move(struct proc_dir_entry *ddir,
+		struct proc_dir_entry *sdir,
+		const char *name)
+{
+	struct proc_dir_entry **p, *q;
+	int len;
+
+	len = strlen(name);
+	for (p = &sdir->subdir, q = *p; q != NULL; p = &q->next, q = *p)
+		if (proc_match(len, name, q))
+			break;
+	if (q == NULL)
+		return;
+	*p = q->next;
+	q->parent = ddir;
+	q->next = ddir->subdir;
+	ddir->subdir = q;
+}
+static void prepare_proc_misc(void)
+{
+	static char *table[] = {
+		"loadavg",
+		"uptime",
+		"meminfo",
+		"version",
+		"stat",
+		"filesystems",
+		"locks",
+		"swaps",
+		"mounts",
+		"net",
+		"cpuinfo",
+		"sysvipc",
+		"sys",
+		"fs",
+		"vz",
+		"cmdline",
+		"vmstat",
+		"modules",
+		NULL,
+	};
+	char **p;
+
+	for (p = table; *p != NULL; p++)
+		proc_move(&proc_root, ve0.proc_root, *p);
+}
+int prepare_proc(void)
+{
+	struct ve_struct *envid;
+	struct proc_dir_entry *de;
+	struct proc_dir_entry *ve_root;
+
+	envid = set_exec_env(&ve0);
+	ve_root = ve0.proc_root->subdir;
+	/* move the whole tree to be visible in VE0 only */
+	ve0.proc_root->subdir = proc_root.subdir;
+	for (de = ve0.proc_root->subdir; de->next != NULL; de = de->next)
+		de->parent = ve0.proc_root;
+	de->parent = ve0.proc_root;
+	de->next = ve_root;
+
+	/* move back into the global scope some specific entries */
+	proc_root.subdir = NULL;
+	prepare_proc_misc();
+	proc_net = proc_mkdir("net", ve0.proc_root);
+	proc_net_stat = proc_mkdir("stat", proc_net);
+	proc_mkdir("vz", NULL);
+#ifdef CONFIG_SYSVIPC
+	proc_mkdir("sysvipc", NULL);
+#endif
+	proc_root_fs = proc_mkdir("fs", NULL);
+	/* XXX proc_tty_init(); */
+
+	/* XXX process inodes */
+
+	(void)set_exec_env(envid);
+
+	(void)create_proc_glob_entry("vz", S_IFDIR|S_IRUGO|S_IXUGO, NULL);
+	return 0;
+}
+
+static struct proc_dir_entry ve0_proc_root = {
+	.name = "/proc",
+	.namelen = 5,
+	.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+	.nlink = 2
+};
+
+void prepare_ve0_proc_root(void)
+{
+	ve0.proc_root = &ve0_proc_root;
+}
+#endif
+
+/*
+ * ------------------------------------------------------------------------
+ * Virtualized sysctl
+ * ------------------------------------------------------------------------
+ */
+
+static int semmin[4] = { 1, 1, 1, 1 };
+static int semmax[4] = { 8000, INT_MAX, 1000, IPCMNI };
+static ctl_table kern_table[] = {
+	{KERN_NODENAME, "hostname", system_utsname.nodename, 64,
+	 0644, NULL, &proc_doutsstring, &sysctl_string},
+	{KERN_DOMAINNAME, "domainname", system_utsname.domainname, 64,
+	 0644, NULL, &proc_doutsstring, &sysctl_string},
+#ifdef CONFIG_SYSVIPC
+#define get_ve0_field(fname) &ve0._##fname
+	{KERN_SHMMAX, "shmmax", get_ve0_field(shm_ctlmax), sizeof (size_t),
+	 0644, NULL, &proc_doulongvec_minmax },
+	{KERN_SHMALL, "shmall", get_ve0_field(shm_ctlall), sizeof (size_t),
+	 0644, NULL, &proc_doulongvec_minmax },
+	{KERN_SHMMNI, "shmmni", get_ve0_field(shm_ctlmni), sizeof (int),
+	 0644, NULL, &proc_dointvec_minmax, NULL,
+	 NULL, &semmin[0], &semmax[3] },
+	{KERN_MSGMAX, "msgmax", get_ve0_field(msg_ctlmax), sizeof (int),
+	 0644, NULL, &proc_dointvec },
+	{KERN_MSGMNI, "msgmni", get_ve0_field(msg_ctlmni), sizeof (int),
+	 0644, NULL, &proc_dointvec_minmax, NULL,
+	 NULL, &semmin[0], &semmax[3] },
+	{KERN_MSGMNB, "msgmnb", get_ve0_field(msg_ctlmnb), sizeof (int),
+	 0644, NULL, &proc_dointvec },
+	{KERN_SEM, "sem", get_ve0_field(sem_ctls), 4*sizeof (int),
+	 0644, NULL, &proc_dointvec },
+#endif
+	{0}
+};
+static ctl_table root_table[] = {
+	{CTL_KERN, "kernel", NULL, 0, 0555, kern_table},
+	{0}
+};
+extern int ve_area_access_check;
+#ifdef CONFIG_INET
+static ctl_table vz_ipv4_route_table[] = {
+	{
+		.ctl_name	= NET_IPV4_ROUTE_SRC_CHECK,
+		.procname	= "src_check",
+		.data		= &ip_rt_src_check,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{ 0 }
+};
+static ctl_table vz_ipv4_table[] = {
+	{NET_IPV4_ROUTE, "route", NULL, 0, 0555, vz_ipv4_route_table},
+	{ 0 }
+};
+static ctl_table vz_net_table[] = {
+	{NET_IPV4,   "ipv4",      NULL, 0, 0555, vz_ipv4_table},
+	{ 0 }
+};
+#endif
+static ctl_table vz_fs_table[] = {
+	{
+		.ctl_name	= 226,
+		.procname	= "ve-area-access-check",
+		.data		= &ve_area_access_check,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{ 0 }
+};
+static ctl_table root_table2[] = {
+#ifdef CONFIG_INET
+	{CTL_NET, "net", NULL, 0, 0555, vz_net_table},
+#endif
+	{CTL_FS, "fs", NULL, 0, 0555, vz_fs_table},
+	{ 0 }
+};
+int prepare_sysctl(void)
+{
+	struct ve_struct *envid;
+
+	envid = set_exec_env(&ve0);
+	ve0.kern_header = register_sysctl_table(root_table, 1);
+	register_sysctl_table(root_table2, 0);
+	(void)set_exec_env(envid);
+	return 0;
+}
+
+void prepare_ve0_sysctl(void)
+{
+	INIT_LIST_HEAD(&ve0.sysctl_lh);
+#ifdef CONFIG_SYSCTL
+	ve0.proc_sys_root = proc_mkdir("sys", NULL);
+#endif
+}
+
+/*
+ * ------------------------------------------------------------------------
+ * XXX init_ve_system
+ * ------------------------------------------------------------------------
+ */
+
+void init_ve_system(void)
+{
+	struct task_struct *init_entry;
+	struct ve_struct *ve;
+
+	ve = get_ve0();
+
+	init_entry = child_reaper;
+	ve->init_entry = init_entry;
+	/* if ve_move_task to VE0 (e.g. in cpt code)	*
+	 * occurs, ve_cap_bset on VE0 is required	*/
+	ve->ve_cap_bset = CAP_INIT_EFF_SET;
+
+#ifdef CONFIG_INET
+	ve->_ipv4_devconf = &ipv4_devconf;
+	ve->_ipv4_devconf_dflt = &ipv4_devconf_dflt;
+#endif
+
+	read_lock(&init_entry->fs->lock);
+	ve->fs_rootmnt = init_entry->fs->rootmnt;
+	ve->fs_root = init_entry->fs->root;
+	read_unlock(&init_entry->fs->lock);
+
+	/* common prepares */
+#ifdef CONFIG_PROC_FS
+	prepare_proc();
+#endif
+	prepare_sysctl();
+	prepare_ipc();
+}
diff -upr linux-2.6.16.46-0.12.orig/kernel/ve/vzdev.c linux-2.6.16.46-0.12-027test011/kernel/ve/vzdev.c
--- linux-2.6.16.46-0.12.orig/kernel/ve/vzdev.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/ve/vzdev.c	2007-08-28 17:35:36.000000000 +0400
@@ -0,0 +1,156 @@
+/*
+ *  kernel/vzdev.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/vzctl.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/vzcalluser.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <linux/device.h>
+#include <linux/smp_lock.h>
+
+#define VZCTL_MAJOR 126
+#define VZCTL_NAME "vzctl"
+
+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
+MODULE_DESCRIPTION("Virtuozzo Interface");
+MODULE_LICENSE("GPL v2");
+
+static LIST_HEAD(ioctls);
+static spinlock_t ioctl_lock = SPIN_LOCK_UNLOCKED;
+
+static struct vzioctlinfo *vzctl_get_handler(unsigned int cmd)
+{
+	struct vzioctlinfo *h;
+
+	spin_lock(&ioctl_lock);
+	list_for_each_entry(h, &ioctls, list) {
+		if (h->type == _IOC_TYPE(cmd))
+			goto found;
+	}
+	h = NULL;
+found:
+	if (h && !try_module_get(h->owner))
+		h = NULL;
+	spin_unlock(&ioctl_lock);
+	return h;
+}
+
+static void vzctl_put_handler(struct vzioctlinfo *h)
+{
+	if (!h)
+		return;
+
+	module_put(h->owner);
+}
+
+long vzctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct vzioctlinfo *h;
+	int err;
+
+	err = -ENOTTY;
+	h = vzctl_get_handler(cmd);
+	if (h && h->ioctl) {
+		err = (*h->ioctl)(file, cmd, arg);
+	}
+	vzctl_put_handler(h);
+
+	return err;
+}
+
+long compat_vzctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct vzioctlinfo *h;
+	int err;
+
+	err = -ENOIOCTLCMD;
+	h = vzctl_get_handler(cmd);
+	if (h && h->compat_ioctl) {
+		err = (*h->compat_ioctl)(file, cmd, arg);
+	}
+	vzctl_put_handler(h);
+
+	return err;
+}
+
+void vzioctl_register(struct vzioctlinfo *inf)
+{
+	spin_lock(&ioctl_lock);
+	list_add(&inf->list, &ioctls);
+	spin_unlock(&ioctl_lock);
+}
+EXPORT_SYMBOL(vzioctl_register);
+
+void vzioctl_unregister(struct vzioctlinfo *inf)
+{
+	spin_lock(&ioctl_lock);
+	list_del_init(&inf->list);
+	spin_unlock(&ioctl_lock);
+}
+EXPORT_SYMBOL(vzioctl_unregister);
+
+/*
+ * Init/exit stuff.
+ */
+static struct file_operations vzctl_fops = {
+	.owner		= THIS_MODULE,
+	.unlocked_ioctl	= vzctl_ioctl,
+	.compat_ioctl	= compat_vzctl_ioctl,
+};
+
+static struct class *vzctl_class;
+
+static void __exit vzctl_exit(void)
+{
+	class_device_destroy(vzctl_class, MKDEV(VZCTL_MAJOR, 0));
+	class_destroy(vzctl_class);
+	unregister_chrdev(VZCTL_MAJOR, VZCTL_NAME);
+}
+
+static int __init vzctl_init(void)
+{
+	int ret;
+	struct class_device *class_err;
+
+	ret = register_chrdev(VZCTL_MAJOR, VZCTL_NAME, &vzctl_fops);
+	if (ret < 0)
+		goto out;
+
+	vzctl_class = class_create(THIS_MODULE, "vzctl");
+	if (IS_ERR(vzctl_class)) {
+		ret = PTR_ERR(vzctl_class);
+		goto out_cleandev;
+	}
+
+	class_err = class_device_create(vzctl_class, NULL, MKDEV(VZCTL_MAJOR, 0),
+				NULL, VZCTL_NAME);
+	if (IS_ERR(class_err)) {
+		ret = PTR_ERR(class_err);
+		goto out_rmclass;
+	}
+
+	goto out;
+
+out_rmclass:
+	class_destroy(vzctl_class);
+out_cleandev:
+	unregister_chrdev(VZCTL_MAJOR, VZCTL_NAME);
+out:
+	return ret;
+}
+
+module_init(vzctl_init)
+module_exit(vzctl_exit);
diff -upr linux-2.6.16.46-0.12.orig/kernel/ve/vzevent.c linux-2.6.16.46-0.12-027test011/kernel/ve/vzevent.c
--- linux-2.6.16.46-0.12.orig/kernel/ve/vzevent.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/ve/vzevent.c	2007-08-28 17:35:32.000000000 +0400
@@ -0,0 +1,145 @@
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/netlink.h>
+#include <linux/errno.h>
+#include <linux/ve_proto.h>
+#include <linux/vzevent.h>
+
+#define NETLINK_UEVENT	15
+#define VZ_EVGRP_ALL	0x01
+
+#define KOBJECT_UEVENT	(defined(CONFIG_HOTPLUG) && defined(CONFIG_NET))
+
+#if KOBJECT_UEVENT
+#ifdef NETLINK_KOBJECT_UEVENT
+#if NETLINK_KOBJECT_UEVENT != 15
+#error "netlink conflict?"
+#endif
+#endif
+
+static int do_vzevent_send(int event, char *msg, int len)
+{
+	struct kobject ktmp;
+
+	memset(&ktmp, 0, sizeof(ktmp));
+	ktmp.parent = NULL;
+	ktmp.k_name = msg;
+	kobject_uevent(&ktmp, event);
+	return 0;
+}
+#else /* KOBJECT_UEVENT */
+static struct sock *vzev_sock;
+
+static char *action_to_string(int action)
+{
+	switch (action) {
+	case KOBJ_MOUNT:
+		return "mount";
+	case KOBJ_UMOUNT:
+		return "umount";
+	case KOBJ_START:
+		return "start";
+	case KOBJ_STOP:
+		return "stop";
+	default:
+		return NULL;
+	}
+}
+
+static int do_vzevent_send(int event, char *msg, int len)
+{
+	struct sk_buff *skb;
+	char *buf, *action;
+	int alen;
+
+	action = action_to_string(event);
+	alen = strlen(action);
+
+	skb = alloc_skb(len + 1 + alen, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	buf = skb_put(skb, len + 1 + alen);
+	memcpy(buf, action, alen);
+	buf[alen] = '@';
+	memcpy(buf + alen + 1, msg, len);
+	(void)netlink_broadcast(vzev_sock, skb, 0, VZ_EVGRP_ALL, GFP_KERNEL);
+	return 0;
+}
+#endif
+
+int vzevent_send(int event, const char *attrs_fmt, ...)
+{
+	va_list args;
+	int len, err;
+	struct ve_struct *ve;
+	char *page;
+
+	err = -ENOMEM;
+	page = (char *)__get_free_page(GFP_KERNEL);
+	if (!page)
+		goto out;
+
+	va_start(args, attrs_fmt);
+	len = vscnprintf(page, PAGE_SIZE, attrs_fmt, args);
+	va_end(args);
+
+	ve = set_exec_env(get_ve0());
+	err = do_vzevent_send(event, page, len);
+	(void)set_exec_env(ve);
+	free_page((unsigned long)page);
+out:
+	return err;
+}
+EXPORT_SYMBOL(vzevent_send);
+
+static int ve_start(void *data)
+{
+	struct ve_struct *ve;
+
+	ve = (struct ve_struct *)data;
+	vzevent_send(KOBJ_START, "%d", ve->veid);
+	return 0;
+}
+
+static void ve_stop(void *data)
+{
+	struct ve_struct *ve;
+
+	ve = (struct ve_struct *)data;
+	vzevent_send(KOBJ_STOP, "%d", ve->veid);
+}
+
+static struct ve_hook ve_start_stop_hook = {
+	.init		= ve_start,
+	.fini		= ve_stop,
+	.owner		= THIS_MODULE,
+	.priority	= HOOK_PRIO_AFTERALL,
+};
+
+static int __init init_vzevent(void)
+{
+#if !KOBJECT_UEVENT
+	vzev_sock = netlink_kernel_create(NETLINK_UEVENT, 0, NULL, THIS_MODULE);
+	if (vzev_sock == NULL)
+		return -ENOMEM;
+#endif
+	ve_hook_register(VE_SS_CHAIN, &ve_start_stop_hook);
+	return 0;
+}
+
+static void __exit exit_vzevent(void)
+{
+	ve_hook_unregister(&ve_start_stop_hook);
+#if !KOBJECT_UEVENT
+	sock_release(vzev_sock->sk_socket);
+#endif
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_vzevent);
+module_exit(exit_vzevent);
diff -upr linux-2.6.16.46-0.12.orig/kernel/ve/vzwdog.c linux-2.6.16.46-0.12-027test011/kernel/ve/vzwdog.c
--- linux-2.6.16.46-0.12.orig/kernel/ve/vzwdog.c	2007-08-28 17:35:39.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/kernel/ve/vzwdog.c	2007-08-28 17:35:32.000000000 +0400
@@ -0,0 +1,288 @@
+/*
+ *  kernel/vzwdog.c
+ *
+ *  Copyright (C) 2000-2005  SWsoft
+ *  All rights reserved.
+ *
+ *  Licensing governed by "linux/COPYING.SWsoft" file.
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/ctype.h>
+#include <linux/kobject.h>
+#include <linux/genhd.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/smp_lock.h>
+#include <linux/errno.h>
+#include <linux/suspend.h>
+#include <linux/ve.h>
+#include <linux/vzstat.h>
+#include <asm/uaccess.h>
+#include <linux/kthread.h>
+
+/* Staff regading kernel thread polling VE validity */
+static int sleep_timeout = 60;
+static struct task_struct *wdog_thread_tsk;
+
+extern void show_mem(void);
+
+static struct file *intr_file;
+static char page[PAGE_SIZE];
+
+static void parse_irq_list(int len)
+{
+	int i, k, skip;
+	for (i = 0; i < len; ) {
+		k = i;
+		while (i < len && page[i] != '\n' && page[i] != ':')
+			i++;
+		skip = 0;
+		if (i < len && page[i] != '\n') {
+			i++; /* skip ':' */
+			while (i < len && (page[i] == ' ' || page[i] == '0'))
+				i++;
+			skip = (i < len && (page[i] < '0' || page[i] > '9'));
+			while (i < len && page[i] != '\n')
+				i++;
+		}
+		if (!skip)
+			printk("%.*s\n", i - k, page + k);
+		if (i < len)
+			i++; /* skip '\n' */
+	}
+}
+
+extern loff_t vfs_llseek(struct file *file, loff_t, int);
+extern ssize_t vfs_read(struct file *file, char __user *, size_t, loff_t *);
+extern struct file *filp_open(const char *filename, int flags, int mode);
+extern int filp_close(struct file *filp, fl_owner_t id);
+static void show_irq_list(void)
+{
+	mm_segment_t fs;
+	int r;
+
+	fs = get_fs();
+	set_fs(KERNEL_DS);
+	vfs_llseek(intr_file, 0, 0);
+	r = vfs_read(intr_file, (void __user *)page, sizeof(page),
+			&intr_file->f_pos);
+	set_fs(fs);
+
+	if (r > 0)
+		parse_irq_list(r);
+}
+
+static void show_alloc_latency(void)
+{
+	static const char *alloc_descr[KSTAT_ALLOCSTAT_NR] = {
+		"A0",
+		"L0",
+		"H0",
+		"L1",
+		"H1"
+	};
+	int i;
+
+	printk("lat: ");
+	for (i = 0; i < KSTAT_ALLOCSTAT_NR; i++) {
+		struct kstat_lat_struct *p;
+		cycles_t maxlat, avg0, avg1, avg2;
+
+		p = &kstat_glob.alloc_lat[i];
+		spin_lock_irq(&kstat_glb_lock);
+		maxlat = p->last.maxlat;
+		avg0 = p->avg[0];
+		avg1 = p->avg[1];
+		avg2 = p->avg[2];
+		spin_unlock_irq(&kstat_glb_lock);
+
+		printk("%s %Lu (%Lu %Lu %Lu)",
+				alloc_descr[i],
+				(unsigned long long)maxlat,
+				(unsigned long long)avg0,
+				(unsigned long long)avg1,
+				(unsigned long long)avg2);
+	}
+	printk("\n");
+}
+
+static void show_schedule_latency(void)
+{
+	struct kstat_lat_pcpu_struct *p;
+	cycles_t maxlat, totlat, avg0, avg1, avg2;
+	unsigned long count;
+
+	p = &kstat_glob.sched_lat;
+	spin_lock_irq(&kstat_glb_lock);
+	maxlat = p->last.maxlat;
+	totlat = p->last.totlat;
+	count = p->last.count;
+	avg0 = p->avg[0];
+	avg1 = p->avg[1];
+	avg2 = p->avg[2];
+	spin_unlock_irq(&kstat_glb_lock);
+
+	printk("sched lat: %Lu/%Lu/%lu (%Lu %Lu %Lu)\n",
+			(unsigned long long)maxlat,
+			(unsigned long long)totlat,
+			count,
+			(unsigned long long)avg0,
+			(unsigned long long)avg1,
+			(unsigned long long)avg2);
+}
+
+static void show_header(void)
+{
+	struct timeval tv;
+
+	do_gettimeofday(&tv);
+	preempt_disable();
+	printk("*** VZWDOG 1.14: time %lu.%06lu uptime %Lu CPU %d ***\n",
+			tv.tv_sec, (long)tv.tv_usec,
+			(unsigned long long)get_jiffies_64(),
+			smp_processor_id());
+#ifdef CONFIG_FAIRSCHED
+	printk("*** cycles_per_jiffy %lu jiffies_per_second %u ***\n",
+			cycles_per_jiffy, HZ);
+#else
+	printk("*** jiffies_per_second %u ***\n", HZ);
+#endif
+	preempt_enable();
+}
+
+static void show_pgdatinfo(void)
+{
+	pg_data_t *pgdat;
+
+	printk("pgdat:");
+	for_each_pgdat(pgdat) {
+		printk(" %d: %lu,%lu,%lu"
+#ifdef CONFIG_FLAT_NODE_MEM_MAP
+",%p"
+#endif
+			,
+			pgdat->node_id,
+			pgdat->node_start_pfn,
+			pgdat->node_present_pages,
+			pgdat->node_spanned_pages
+#ifdef CONFIG_FLAT_NODE_MEM_MAP
+			,
+			pgdat->node_mem_map
+#endif
+		);
+	}
+	printk("\n");
+}
+
+static void show_diskio(void)
+{
+	struct gendisk *gd;
+	char buf[BDEVNAME_SIZE];
+
+	printk("disk_io: ");
+
+	down_read(&block_subsys.rwsem);
+	list_for_each_entry(gd, &block_subsys.kset.list, kobj.entry) {
+		char *name;
+		name = disk_name(gd, 0, buf);
+		if ((strlen(name) > 4) && (strncmp(name, "loop", 4) == 0) &&
+		    isdigit(name[4]))
+			continue;
+		if ((strlen(name) > 3) && (strncmp(name, "ram", 3) == 0) &&
+		    isdigit(name[3]))
+			continue;
+		printk("(%u,%u) %s r(%u %u %u) w(%u %u %u)\n",
+			gd->major, gd->first_minor,
+			name,
+			disk_stat_read(gd, ios[READ]),
+			disk_stat_read(gd, sectors[READ]),
+			disk_stat_read(gd, merges[READ]),
+			disk_stat_read(gd, ios[WRITE]),
+			disk_stat_read(gd, sectors[WRITE]),
+			disk_stat_read(gd, merges[WRITE]));
+	}
+	up_read(&block_subsys.rwsem);
+
+	printk("\n");
+}
+
+static void show_nrprocs(void)
+{
+	unsigned long _nr_running, _nr_sleeping,
+			_nr_unint, _nr_zombie, _nr_dead, _nr_stopped;
+
+	_nr_running = nr_running();
+	_nr_unint = nr_uninterruptible();
+	_nr_sleeping = nr_sleeping();
+	_nr_zombie = nr_zombie;
+	_nr_dead = atomic_read(&nr_dead);
+	_nr_stopped = nr_stopped();
+
+	printk("VEnum: %d, proc R %lu, S %lu, D %lu, "
+		"Z %lu, X %lu, T %lu (tot %d)\n",
+		nr_ve,	_nr_running, _nr_sleeping, _nr_unint,
+		_nr_zombie, _nr_dead, _nr_stopped, nr_threads);
+}
+
+static void wdog_print(void)
+{
+	show_header();
+	show_irq_list();
+	show_pgdatinfo();
+	show_mem();
+	show_diskio();
+	show_schedule_latency();
+	show_alloc_latency();
+	show_nrprocs();
+}
+
+static int wdog_loop(void* data)
+{
+	while (1) {
+		wdog_print();
+		try_to_freeze();
+
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (kthread_should_stop())
+			break;
+		schedule_timeout(sleep_timeout*HZ);
+	}
+	return 0;
+}
+
+static int __init wdog_init(void)
+{
+	struct file *file;
+
+	file = filp_open("/proc/interrupts", 0, 0);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+	intr_file = file;
+
+	wdog_thread_tsk = kthread_run(wdog_loop, NULL, "vzwdog");
+	if (IS_ERR(wdog_thread_tsk)) {
+		filp_close(intr_file, NULL);
+		return -EBUSY;
+	}
+	return 0;
+}
+
+static void __exit wdog_exit(void)
+{
+	kthread_stop(wdog_thread_tsk);
+	filp_close(intr_file, NULL);
+}
+
+module_param(sleep_timeout, int, 0666);
+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
+MODULE_DESCRIPTION("Virtuozzo WDOG");
+MODULE_LICENSE("GPL v2");
+
+module_init(wdog_init)
+module_exit(wdog_exit)
diff -upr linux-2.6.16.46-0.12.orig/lib/Kconfig.debug linux-2.6.16.46-0.12-027test011/lib/Kconfig.debug
--- linux-2.6.16.46-0.12.orig/lib/Kconfig.debug	2007-08-24 19:28:10.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/lib/Kconfig.debug	2007-08-28 17:35:34.000000000 +0400
@@ -23,6 +23,14 @@ config MAGIC_SYSRQ
 	  keys are documented in <file:Documentation/sysrq.txt>. Don't say Y
 	  unless you really know what this hack does.
 
+config SYSRQ_DEBUG
+	bool "Debugging via sysrq keys"
+	depends on MAGIC_SYSRQ
+	help
+	  Say Y if you want to extend functionality of magic key. It will
+	  provide you with some debugging facilities such as dumping and
+	  writing memory, resolving symbols and some other.
+
 config DEBUG_KERNEL
 	bool "Kernel debugging"
 	help
@@ -48,7 +56,7 @@ config LOG_BUF_SHIFT
 
 config DETECT_SOFTLOCKUP
 	bool "Detect Soft Lockups"
-	depends on DEBUG_KERNEL
+	depends on DEBUG_KERNEL && !SCHED_VCPU
 	default y
 	help
 	  Say Y here to enable the kernel to detect "soft lockups",
diff -upr linux-2.6.16.46-0.12.orig/lib/bust_spinlocks.c linux-2.6.16.46-0.12-027test011/lib/bust_spinlocks.c
--- linux-2.6.16.46-0.12.orig/lib/bust_spinlocks.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/lib/bust_spinlocks.c	2007-08-28 17:35:30.000000000 +0400
@@ -13,26 +13,21 @@
 #include <linux/tty.h>
 #include <linux/wait.h>
 #include <linux/vt_kern.h>
-
+#include <linux/console.h>
 
 void bust_spinlocks(int yes)
 {
+	if (printk_no_wake)
+		return;
+
 	if (yes) {
 		oops_in_progress = 1;
 	} else {
-		int loglevel_save = console_loglevel;
 #ifdef CONFIG_VT
 		unblank_screen();
 #endif
 		oops_in_progress = 0;
-		/*
-		 * OK, the message is on the console.  Now we call printk()
-		 * without oops_in_progress set so that printk() will give klogd
-		 * and the blanked console a poke.  Hold onto your hats...
-		 */
-		console_loglevel = 15;		/* NMI oopser may have shut the console up */
-		printk(" ");
-		console_loglevel = loglevel_save;
+		wake_up_klogd();
 	}
 }
 
diff -upr linux-2.6.16.46-0.12.orig/lib/kobject_uevent.c linux-2.6.16.46-0.12-027test011/lib/kobject_uevent.c
--- linux-2.6.16.46-0.12.orig/lib/kobject_uevent.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/lib/kobject_uevent.c	2007-08-28 17:35:31.000000000 +0400
@@ -46,6 +46,10 @@ static char *action_to_string(enum kobje
 		return "offline";
 	case KOBJ_ONLINE:
 		return "online";
+	case KOBJ_START:
+		return "start";
+	case KOBJ_STOP:
+		return "stop";
 	default:
 		return NULL;
 	}
diff -upr linux-2.6.16.46-0.12.orig/lib/smp_processor_id.c linux-2.6.16.46-0.12-027test011/lib/smp_processor_id.c
--- linux-2.6.16.46-0.12.orig/lib/smp_processor_id.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/lib/smp_processor_id.c	2007-08-28 17:35:34.000000000 +0400
@@ -7,11 +7,26 @@
 #include <linux/kallsyms.h>
 #include <linux/sched.h>
 
+#ifdef CONFIG_VCPU
+/* We can not guarantee pcpu affinity if use VCPU extention */
+static inline int run_on_single_cpu(int cpu) { return 0; }
+#else
+static inline int run_on_single_cpu(int cpu)
+{
+	cpumask_t this_mask;
+
+	this_mask = cpumask_of_cpu(cpu);
+	if (cpus_equal(current->cpus_allowed, this_mask))
+		return 1;
+
+	return 0;
+}
+#endif
+
 unsigned int debug_smp_processor_id(void)
 {
 	unsigned long preempt_count = preempt_count();
 	int this_cpu = raw_smp_processor_id();
-	cpumask_t this_mask;
 
 	if (likely(preempt_count))
 		goto out;
@@ -23,9 +38,7 @@ unsigned int debug_smp_processor_id(void
 	 * Kernel threads bound to a single CPU can safely use
 	 * smp_processor_id():
 	 */
-	this_mask = cpumask_of_cpu(this_cpu);
-
-	if (cpus_equal(current->cpus_allowed, this_mask))
+	if (run_on_single_cpu(this_cpu))
 		goto out;
 
 	/*
diff -upr linux-2.6.16.46-0.12.orig/mm/filemap.c linux-2.6.16.46-0.12-027test011/mm/filemap.c
--- linux-2.6.16.46-0.12.orig/mm/filemap.c	2007-08-24 19:28:27.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/mm/filemap.c	2007-08-28 17:35:30.000000000 +0400
@@ -39,6 +39,8 @@
 #include <asm/uaccess.h>
 #include <asm/mman.h>
 
+#include <ub/io_acct.h>
+
 static ssize_t
 generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 	loff_t offset, unsigned long nr_segs);
@@ -117,12 +119,14 @@ void inline __remove_from_page_cache_noc
 
 	radix_tree_delete(&mapping->page_tree, page->index);
 	page->mapping = NULL;
+	ub_io_release_debug(page);
 	mapping->nrpages--;
 	pagecache_acct(-1);
 }
 
 void __remove_from_page_cache(struct page *page)
 {
+#ifdef CONFIG_PAGE_STATES
 	/*
 	 * Check if the discard fault handler already removed
 	 * the page from the page cache. If not set the discard
@@ -131,9 +135,9 @@ void __remove_from_page_cache(struct pag
 	 */
 	if (page_host_discards() && TestSetPageDiscarded(page))
 		return;
-
+#endif
 	__remove_from_page_cache_nocheck(page);
-
+#ifdef CONFIG_PAGE_STATES
 	/*
 	 * Check the hardware page state and clear the discard
 	 * bit in the page flags only if the page is not
@@ -141,6 +145,7 @@ void __remove_from_page_cache(struct pag
 	 */
 	if (page_host_discards() && !page_discarded(page))
 		ClearPageDiscarded(page);
+#endif
 }
 
 void remove_from_page_cache(struct page *page)
diff -upr linux-2.6.16.46-0.12.orig/mm/filemap_xip.c linux-2.6.16.46-0.12-027test011/mm/filemap_xip.c
--- linux-2.6.16.46-0.12.orig/mm/filemap_xip.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/mm/filemap_xip.c	2007-08-28 17:35:30.000000000 +0400
@@ -15,6 +15,7 @@
 #include <linux/rmap.h>
 #include <asm/tlbflush.h>
 #include "filemap.h"
+#include <ub/ub_vmpages.h>
 
 /*
  * This is a file read routine for execute in place files, and uses
@@ -190,6 +191,8 @@ __xip_unmap (struct address_space * mapp
 			flush_cache_page(vma, address, pte_pfn(*pte));
 			pteval = ptep_clear_flush(vma, address, pte);
 			page_remove_rmap(page);
+			pb_remove_ref(page, mm);
+			ub_unused_privvm_inc(mm, vma);
 			dec_mm_counter(mm, file_rss);
 			BUG_ON(pte_dirty(pteval));
 			pte_unmap_unlock(pte, ptl);
diff -upr linux-2.6.16.46-0.12.orig/mm/fremap.c linux-2.6.16.46-0.12-027test011/mm/fremap.c
--- linux-2.6.16.46-0.12.orig/mm/fremap.c	2007-08-24 19:28:24.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/mm/fremap.c	2007-08-28 17:35:33.000000000 +0400
@@ -20,6 +20,8 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#include <ub/ub_vmpages.h>
+
 static int zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long addr, pte_t *ptep)
 {
@@ -34,6 +36,7 @@ static int zap_pte(struct mm_struct *mm,
 			if (pte_dirty(pte))
 				set_page_dirty(page);
 			page_remove_rmap(page);
+			pb_remove_ref(page, mm);
 			page_cache_release(page);
 		}
 	} else {
@@ -57,6 +60,10 @@ int install_page(struct mm_struct *mm, s
 	pte_t *pte;
 	pte_t pte_val;
 	spinlock_t *ptl;
+	struct page_beancounter *pbc;
+
+	if (unlikely(pb_alloc(&pbc)))
+		goto out_nopb;
 
 	pte = get_locked_pte(mm, addr, &ptl);
 	if (!pte)
@@ -75,13 +82,16 @@ int install_page(struct mm_struct *mm, s
 	if (page_mapcount(page) > INT_MAX/2)
 		goto unlock;
 
-	if (pte_none(*pte) || !zap_pte(mm, vma, addr, pte))
+	if (pte_none(*pte) || !zap_pte(mm, vma, addr, pte)) {
+		ub_unused_privvm_dec(mm, vma);
 		inc_mm_counter(mm, file_rss);
+	}
 
 	flush_icache_page(vma, page);
 	pte_val = mk_pte(page, prot);
 	page_check_writable(page, pte_val, 2);
 	set_pte_at(mm, addr, pte, pte_val);
+	pb_add_ref(page, mm, &pbc);
 	page_add_file_rmap(page);
 	page_make_volatile(page, 1);
 	pte_val = *pte;
@@ -90,6 +100,8 @@ int install_page(struct mm_struct *mm, s
 unlock:
 	pte_unmap_unlock(pte, ptl);
 out:
+	pb_free(&pbc);
+out_nopb:
 	return err;
 }
 EXPORT_SYMBOL(install_page);
@@ -112,6 +124,7 @@ int install_file_pte(struct mm_struct *m
 
 	if (!pte_none(*pte) && zap_pte(mm, vma, addr, pte)) {
 		update_hiwater_rss(mm);
+		ub_unused_privvm_inc(mm, vma);
 		dec_mm_counter(mm, file_rss);
 	}
 
@@ -223,4 +236,5 @@ asmlinkage long sys_remap_file_pages(uns
 
 	return err;
 }
+EXPORT_SYMBOL_GPL(sys_remap_file_pages);
 
diff -upr linux-2.6.16.46-0.12.orig/mm/memory.c linux-2.6.16.46-0.12-027test011/mm/memory.c
--- linux-2.6.16.46-0.12.orig/mm/memory.c	2007-08-24 19:28:37.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/mm/memory.c	2007-08-28 17:35:34.000000000 +0400
@@ -42,6 +42,7 @@
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/mman.h>
+#include <linux/virtinfo.h>
 #include <linux/swap.h>
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
@@ -59,6 +60,10 @@
 #include <linux/swapops.h>
 #include <linux/elf.h>
 
+#include <ub/beancounter.h>
+#include <ub/ub_vmpages.h>
+#include <ub/ub_mem.h>
+
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 /* use the per-pgdat data instead for discontigmem - mbligh */
 unsigned long max_mapnr;
@@ -104,18 +109,21 @@ void pgd_clear_bad(pgd_t *pgd)
 	pgd_ERROR(*pgd);
 	pgd_clear(pgd);
 }
+EXPORT_SYMBOL_GPL(pgd_clear_bad);
 
 void pud_clear_bad(pud_t *pud)
 {
 	pud_ERROR(*pud);
 	pud_clear(pud);
 }
+EXPORT_SYMBOL_GPL(pud_clear_bad);
 
 void pmd_clear_bad(pmd_t *pmd)
 {
 	pmd_ERROR(*pmd);
 	pmd_clear(pmd);
 }
+EXPORT_SYMBOL_GPL(pmd_clear_bad);
 
 /*
  * Note: this doesn't free the actual pages themselves. That
@@ -319,6 +327,7 @@ int __pte_alloc(struct mm_struct *mm, pm
 	spin_unlock(&mm->page_table_lock);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(__pte_alloc);
 
 int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
 {
@@ -422,6 +431,7 @@ struct page *vm_normal_page(struct vm_ar
 	 */
 	return pfn_to_page(pfn);
 }
+EXPORT_SYMBOL_GPL(vm_normal_page);
 
 /*
  * copy one vm_area from one task to the other. Assumes the page tables
@@ -432,7 +442,7 @@ struct page *vm_normal_page(struct vm_ar
 static inline void
 copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
-		unsigned long addr, int *rss)
+		unsigned long addr, int *rss, struct page_beancounter **pbc)
 {
 	unsigned long vm_flags = vma->vm_flags;
 	pte_t pte = *src_pte;
@@ -473,10 +483,13 @@ copy_one_pte(struct mm_struct *dst_mm, s
 
 	page = vm_normal_page(vma, addr, pte);
 	if (page) {
+#ifdef CONFIG_PAGE_STATES
 		if (page_host_discards() && unlikely(PageDiscarded(page)))
 			goto out_discard_pte;
+#endif
 		get_page(page);
 		page_dup_rmap(page);
+		pb_dup_ref(page, dst_mm, pbc);
 		rss[!!PageAnon(page)]++;
 	}
 
@@ -484,6 +497,7 @@ out_set_pte:
 	set_pte_at(dst_mm, addr, dst_pte, pte);
 	return;
 
+#ifdef CONFIG_PAGE_STATES
 out_discard_pte:
 	/*
 	 * If the page referred by the pte has the PG_discarded bit set,
@@ -508,22 +522,39 @@ out_discard_pte:
 		set_pte_at(dst_mm, addr, dst_pte, pgoff_to_pte(page->index));
 	else
 		pte_clear(dst_mm, addr, dst_pte);
+#endif
 }
 
+#define pte_ptrs(a)	(PTRS_PER_PTE - ((a >> PAGE_SHIFT)&(PTRS_PER_PTE - 1)))
+#ifdef CONFIG_USER_RESOURCE
+#define same_ub(mm1, mm2)	((mm1)->mm_ub == (mm2)->mm_ub)
+#else
+#define same_ub(mm1, mm2)	(1)
+#endif
+
 static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-		pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
+		pmd_t *dst_pmd, pmd_t *src_pmd,
+		struct vm_area_struct *dst_vma,
+		struct vm_area_struct *vma,
 		unsigned long addr, unsigned long end)
 {
 	pte_t *src_pte, *dst_pte;
 	spinlock_t *src_ptl, *dst_ptl;
 	int progress = 0;
-	int rss[2];
+	int rss[2], rss_tot;
+	struct page_beancounter *pbc;
+	int err;
 
+	err = -ENOMEM;
+	pbc = same_ub(src_mm, dst_mm) ? PBC_COPY_SAME : NULL;
 again:
+	if (pbc != PBC_COPY_SAME && pb_alloc_list(&pbc, pte_ptrs(addr)))
+		goto out;
 	rss[1] = rss[0] = 0;
 	dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
 	if (!dst_pte)
-		return -ENOMEM;
+		goto out;
+
 	src_pte = pte_offset_map_nested(src_pmd, addr);
 	src_ptl = pte_lockptr(src_mm, src_pmd);
 	spin_lock(src_ptl);
@@ -544,22 +575,31 @@ again:
 			progress++;
 			continue;
 		}
-		copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss);
+		copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
+				vma, addr, rss, &pbc);
 		progress += 8;
 	} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
 
 	spin_unlock(src_ptl);
 	pte_unmap_nested(src_pte - 1);
+	rss_tot = rss[0] + rss[1];
+	ub_unused_privvm_sub(dst_mm, dst_vma, rss_tot);
 	add_mm_rss(dst_mm, rss[0], rss[1]);
 	pte_unmap_unlock(dst_pte - 1, dst_ptl);
 	cond_resched();
 	if (addr != end)
 		goto again;
-	return 0;
+
+	err = 0;
+out:
+	pb_free_list(&pbc);
+	return err;
 }
 
 static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-		pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma,
+		pud_t *dst_pud, pud_t *src_pud,
+		struct vm_area_struct *dst_vma,
+		struct vm_area_struct *vma,
 		unsigned long addr, unsigned long end)
 {
 	pmd_t *src_pmd, *dst_pmd;
@@ -574,14 +614,16 @@ static inline int copy_pmd_range(struct 
 		if (pmd_none_or_clear_bad(src_pmd))
 			continue;
 		if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
-						vma, addr, next))
+						dst_vma, vma, addr, next))
 			return -ENOMEM;
 	} while (dst_pmd++, src_pmd++, addr = next, addr != end);
 	return 0;
 }
 
 static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-		pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
+		pgd_t *dst_pgd, pgd_t *src_pgd,
+		struct vm_area_struct *dst_vma,
+		struct vm_area_struct *vma,
 		unsigned long addr, unsigned long end)
 {
 	pud_t *src_pud, *dst_pud;
@@ -596,19 +638,20 @@ static inline int copy_pud_range(struct 
 		if (pud_none_or_clear_bad(src_pud))
 			continue;
 		if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
-						vma, addr, next))
+						dst_vma, vma, addr, next))
 			return -ENOMEM;
 	} while (dst_pud++, src_pud++, addr = next, addr != end);
 	return 0;
 }
 
-int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-		struct vm_area_struct *vma)
+int __copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *vma,
+		      unsigned long addr, size_t size)
 {
+	struct mm_struct *dst_mm = dst_vma->vm_mm;
+	struct mm_struct *src_mm = vma->vm_mm;
 	pgd_t *src_pgd, *dst_pgd;
 	unsigned long next;
-	unsigned long addr = vma->vm_start;
-	unsigned long end = vma->vm_end;
+	unsigned long end = addr + size;
 
 	/*
 	 * Don't copy ptes where a page fault will fill them correctly.
@@ -631,11 +674,22 @@ int copy_page_range(struct mm_struct *ds
 		if (pgd_none_or_clear_bad(src_pgd))
 			continue;
 		if (copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
-						vma, addr, next))
+						dst_vma, vma, addr, next))
 			return -ENOMEM;
 	} while (dst_pgd++, src_pgd++, addr = next, addr != end);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(__copy_page_range);
+
+int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
+		    struct vm_area_struct *dst_vma, struct vm_area_struct *vma)
+{
+	if (dst_vma->vm_mm != dst)
+		BUG();
+	if (vma->vm_mm != src)
+		BUG();
+	return __copy_page_range(dst_vma, vma, vma->vm_start, vma->vm_end-vma->vm_start);
+}
 
 static unsigned long zap_pte_range(struct mmu_gather *tlb,
 				struct vm_area_struct *vma, pmd_t *pmd,
@@ -647,6 +701,7 @@ static unsigned long zap_pte_range(struc
 	spinlock_t *ptl;
 	int file_rss = 0;
 	int anon_rss = 0;
+	int rss;
 
 	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 	do {
@@ -700,6 +755,7 @@ static unsigned long zap_pte_range(struc
 				file_rss--;
 			}
 			page_remove_rmap(page);
+			pb_remove_ref(page, mm);
 			tlb_remove_page(tlb, page);
 			continue;
 		}
@@ -714,6 +770,8 @@ static unsigned long zap_pte_range(struc
 		pte_clear_full(mm, addr, pte, tlb->fullmm);
 	} while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
 
+	rss = -(file_rss + anon_rss);
+	ub_unused_privvm_add(mm, vma, rss);
 	add_mm_rss(mm, file_rss, anon_rss);
 	pte_unmap_unlock(pte - 1, ptl);
 
@@ -1241,6 +1299,8 @@ int zeromap_page_range(struct vm_area_st
 		if (err)
 			break;
 	} while (pgd++, addr = next, addr != end);
+
+	ub_unused_privvm_sub(mm, vma, size >> PAGE_SHIFT);
 	return err;
 }
 
@@ -1626,6 +1686,7 @@ static int do_wp_page(struct mm_struct *
 	struct page *old_page, *new_page;
 	pte_t entry;
 	int ret = VM_FAULT_MINOR;
+	struct page_beancounter *pbc;
 
 	old_page = vm_normal_page(vma, address, orig_pte);
 	if (!old_page)
@@ -1638,6 +1699,7 @@ static int do_wp_page(struct mm_struct *
 			flush_cache_page(vma, address, pte_pfn(orig_pte));
 			entry = pte_mkyoung(orig_pte);
 			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+			ClearPageCheckpointed(old_page);
 			page_check_writable(old_page, entry, 1);
 			ptep_set_access_flags(vma, address, page_table, entry, 1);
 			update_mmu_cache(vma, address, entry);
@@ -1660,6 +1722,9 @@ static int do_wp_page(struct mm_struct *
 gotten:
 	pte_unmap_unlock(page_table, ptl);
 
+	if (unlikely(pb_alloc(&pbc)))
+		goto oom_nopb;
+
 	if (unlikely(anon_vma_prepare(vma)))
 		goto oom;
 	if (old_page == ZERO_PAGE(address)) {
@@ -1680,12 +1745,15 @@ gotten:
 	if (likely(pte_same(*page_table, orig_pte))) {
 		if (old_page) {
 			page_remove_rmap(old_page);
+			pb_remove_ref(old_page, mm);
 			if (!PageAnon(old_page)) {
 				dec_mm_counter(mm, file_rss);
 				inc_mm_counter(mm, anon_rss);
 			}
-		} else
+		} else {
+			ub_unused_privvm_dec(mm, vma);
 			inc_mm_counter(mm, anon_rss);
+		}
 		flush_cache_page(vma, address, pte_pfn(orig_pte));
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
@@ -1695,6 +1763,7 @@ gotten:
 		update_mmu_cache(vma, address, entry);
 		lru_cache_add_active(new_page);
 		page_add_new_anon_rmap(new_page, vma, address);
+		pb_add_ref(new_page, mm, &pbc);
 
 		/* Free the old page.. */
 		new_page = old_page;
@@ -1704,10 +1773,13 @@ gotten:
 		page_cache_release(new_page);
 	if (old_page)
 		page_cache_release(old_page);
+	pb_free(&pbc);
 unlock:
 	pte_unmap_unlock(page_table, ptl);
 	return ret;
 oom:
+	pb_free(&pbc);
+oom_nopb:
 	if (old_page)
 		page_cache_release(old_page);
 	return VM_FAULT_OOM;
@@ -2066,10 +2138,16 @@ static int do_swap_page(struct mm_struct
 	swp_entry_t entry;
 	pte_t pte;
 	int ret = VM_FAULT_MINOR;
+	struct page_beancounter *pbc;
+	cycles_t start;
 
 	if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
-		goto out;
+		goto out_nostat;
+
+	if (unlikely(pb_alloc(&pbc)))
+		return VM_FAULT_OOM;
 
+	start = get_cycles();
 	entry = pte_to_swp_entry(orig_pte);
 again:
  	delayacct_set_flag(DELAYACCT_PF_SWAPIN);
@@ -2109,8 +2187,11 @@ again:
 	 * Back out if somebody else already faulted in this pte.
 	 */
 	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
-	if (unlikely(!pte_same(*page_table, orig_pte) ||
-		     (page_host_discards() && PageDiscarded(page))))
+	if (unlikely(!pte_same(*page_table, orig_pte)
+#ifdef CONFIG_PAGE_STATES
+				|| (page_host_discards() && PageDiscarded(page))
+#endif
+				))
 		goto out_nomap;
 
 	if (unlikely(!PageUptodate(page))) {
@@ -2121,6 +2202,7 @@ again:
 	/* The page isn't present yet, go ahead with the fault. */
 
 	inc_mm_counter(mm, anon_rss);
+	ub_percpu_inc(mm->mm_ub, swapin);
 	pte = mk_pte(page, vma->vm_page_prot);
 	if (write_access && can_share_swap_page(page)) {
 		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
@@ -2131,10 +2213,11 @@ again:
 	page_check_writable(page, pte, 2);
 	set_pte_at(mm, address, page_table, pte);
 	page_add_anon_rmap(page, vma, address);
+	pb_add_ref(page, mm, &pbc);
+	ub_unused_privvm_dec(mm, vma);
 
 	swap_free(entry);
-	if (vm_swap_full())
-		remove_exclusive_swap_page(page);
+	try_to_remove_exclusive_swap_page(page);
 	unlock_page(page);
 
 	if (write_access) {
@@ -2149,7 +2232,7 @@ again:
 			ret = VM_FAULT_MINOR;
 		else if (rc == VM_FAULT_OOM)
 			ret = VM_FAULT_OOM;
-		goto out;
+		goto out_wp;
 	}
 
 	/* No need to invalidate - it was non-present before */
@@ -2157,10 +2240,16 @@ again:
 	lazy_mmu_prot_update(pte);
 unlock:
 	pte_unmap_unlock(page_table, ptl);
-out:
+out_wp:
+	pb_free(&pbc);
+	spin_lock_irq(&kstat_glb_lock);
+	KSTAT_LAT_ADD(&kstat_glob.swap_in, get_cycles() - start);
+	spin_unlock_irq(&kstat_glb_lock);
+out_nostat:
 	return ret;
 out_nomap:
 	pte_unmap_unlock(page_table, ptl);
+	pb_free(&pbc);
 	unlock_page(page);
 	page_cache_release(page);
 	return ret;
@@ -2178,11 +2267,15 @@ static int do_anonymous_page(struct mm_s
 	struct page *page;
 	spinlock_t *ptl;
 	pte_t entry;
+	struct page_beancounter *pbc;
 
 	if (write_access) {
 		/* Allocate our own private page. */
 		pte_unmap(page_table);
 
+		if (unlikely(pb_alloc(&pbc)))
+			goto oom_nopb;
+
 		if (unlikely(anon_vma_prepare(vma)))
 			goto oom;
 		page = alloc_zeroed_user_highpage(vma, address);
@@ -2199,7 +2292,10 @@ static int do_anonymous_page(struct mm_s
 		inc_mm_counter(mm, anon_rss);
 		lru_cache_add_active(page);
 		page_add_new_anon_rmap(page, vma, address);
+		pb_add_ref(page, mm, &pbc);
 	} else {
+		pbc = NULL;
+
 		/* Map the ZERO_PAGE - vm_page_prot is readonly */
 		page = ZERO_PAGE(address);
 		page_cache_get(page);
@@ -2213,18 +2309,22 @@ static int do_anonymous_page(struct mm_s
 		page_add_file_rmap(page);
 	}
 
+	ub_unused_privvm_dec(mm, vma);
 	set_pte_at(mm, address, page_table, entry);
 
 	/* No need to invalidate - it was non-present before */
 	update_mmu_cache(vma, address, entry);
 	lazy_mmu_prot_update(entry);
 unlock:
+	pb_free(&pbc);
 	pte_unmap_unlock(page_table, ptl);
 	return VM_FAULT_MINOR;
 release:
 	page_cache_release(page);
 	goto unlock;
 oom:
+	pb_free(&pbc);
+oom_nopb:
 	return VM_FAULT_OOM;
 }
 
@@ -2252,6 +2352,7 @@ static int do_no_page(struct mm_struct *
 	unsigned int sequence = 0;
 	int ret = VM_FAULT_MINOR;
 	int anon = 0;
+	struct page_beancounter *pbc;
 
 	pte_unmap(page_table);
 	BUG_ON(vma->vm_flags & VM_PFNMAP);
@@ -2261,6 +2362,9 @@ static int do_no_page(struct mm_struct *
 		sequence = mapping->truncate_count;
 		smp_rmb(); /* serializes i_size against truncate_count */
 	}
+
+	if (unlikely(pb_alloc(&pbc)))
+		goto oom_nopb;
 retry:
 	new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
 	/*
@@ -2273,9 +2377,9 @@ retry:
 
 	/* no page was available -- either SIGBUS or OOM */
 	if (new_page == NOPAGE_SIGBUS)
-		return VM_FAULT_SIGBUS;
+		goto bus_nopg;
 	if (new_page == NOPAGE_OOM)
-		return VM_FAULT_OOM;
+		goto oom_nopg;
 
 	/*
 	 * Should we do an early C-O-W break?
@@ -2337,8 +2441,12 @@ retry:
 	 * handle that later.
 	 */
 	/* Only go through if we didn't race with anybody else... */
-	if (pte_none(*page_table) &&
-	    !unlikely(page_host_discards() && PageDiscarded(new_page))) {
+	if (pte_none(*page_table)
+#ifdef CONFIG_PAGE_STATES
+	    && !unlikely(page_host_discards() && PageDiscarded(new_page))
+#endif
+	   ) {
+		struct user_beancounter *ub;
 		flush_icache_page(vma, new_page);
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		if (write_access)
@@ -2353,6 +2461,18 @@ retry:
 			inc_mm_counter(mm, file_rss);
 			page_add_file_rmap(new_page);
 		}
+		ub = page_ub(new_page);
+		if (ub != NULL && ub->ub_magic == UB_MAGIC) {
+		/* WOW: Page was already charged as page_ub. This may happens
+		 * for example then some driver export its low memory pages
+		 * to user space. We can't account page as page_ub and page_bp
+		 * at the same time. So uncharge page from UB counter.
+		 */
+			WARN_ON(1);
+			ub_page_uncharge(new_page, 0);
+		}
+		pb_add_ref(new_page, mm, &pbc);
+		ub_unused_privvm_dec(mm, vma);
 		if (likely(PageCanTrunc(new_page)))
 			unlock_page(new_page);
 	} else {
@@ -2368,10 +2488,18 @@ retry:
 	lazy_mmu_prot_update(entry);
 unlock:
 	pte_unmap_unlock(page_table, ptl);
+	pb_free(&pbc);
 	return ret;
 oom:
 	page_cache_release(new_page);
+oom_nopg:
+	pb_free(&pbc);
+oom_nopb:
 	return VM_FAULT_OOM;
+
+bus_nopg:
+	pb_free(&pbc);
+	return VM_FAULT_SIGBUS;
 }
 
 /*
@@ -2553,6 +2681,27 @@ int __handle_mm_fault(struct mm_struct *
 	pmd_t *pmd;
 	pte_t *pte;
 
+#ifdef CONFIG_VZ_GENCALLS
+	do {
+		int ret;
+#ifdef CONFIG_USER_RESOURCE
+		struct task_beancounter *tbc;
+
+		tbc = &current->task_bc;
+		if (!test_bit(UB_AFLAG_NOTIF_PAGEIN, &mm->mm_ub->ub_aflags) &&
+				tbc->pgfault_allot) {
+			tbc->pgfault_allot--;
+			break; /* skip notifier */
+		}
+#endif
+		ret = virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_PAGEIN,
+				(void *)1);
+		if (ret & NOTIFY_FAIL)
+			return VM_FAULT_SIGBUS;
+		if (ret & NOTIFY_OK)
+			return VM_FAULT_MINOR; /* retry */
+	} while (0);
+#endif
 	__set_current_state(TASK_RUNNING);
 
 	inc_page_state(pgfault);
@@ -2603,6 +2752,8 @@ int __pud_alloc(struct mm_struct *mm, pg
 }
 #endif /* __PAGETABLE_PUD_FOLDED */
 
+EXPORT_SYMBOL_GPL(__pud_alloc);
+
 #ifndef __PAGETABLE_PMD_FOLDED
 /*
  * Allocate page middle directory.
@@ -2637,6 +2788,8 @@ int __pmd_alloc(struct mm_struct *mm, pu
 }
 #endif /* __PAGETABLE_PMD_FOLDED */
 
+EXPORT_SYMBOL_GPL(__pmd_alloc);
+
 int make_pages_present(unsigned long addr, unsigned long end)
 {
 	int ret, len, write;
@@ -2683,6 +2836,8 @@ int make_pages_present(unsigned long add
 	return ret == len ? 0 : -1;
 }
 
+EXPORT_SYMBOL(make_pages_present);
+
 /* 
  * Map a vmalloc()-space virtual address to the physical page.
  */
diff -upr linux-2.6.16.46-0.12.orig/mm/mempolicy.c linux-2.6.16.46-0.12-027test011/mm/mempolicy.c
--- linux-2.6.16.46-0.12.orig/mm/mempolicy.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/mm/mempolicy.c	2007-08-28 17:35:31.000000000 +0400
@@ -965,7 +965,7 @@ asmlinkage long sys_migrate_pages(pid_t 
 
 	/* Find the mm_struct */
 	read_lock(&tasklist_lock);
-	task = pid ? find_task_by_pid(pid) : current;
+	task = pid ? find_task_by_pid_ve(pid) : current;
 	if (!task) {
 		read_unlock(&tasklist_lock);
 		return -ESRCH;
diff -upr linux-2.6.16.46-0.12.orig/mm/mempool.c linux-2.6.16.46-0.12-027test011/mm/mempool.c
--- linux-2.6.16.46-0.12.orig/mm/mempool.c	2007-08-24 19:28:15.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/mm/mempool.c	2007-08-28 17:35:30.000000000 +0400
@@ -14,6 +14,7 @@
 #include <linux/mempool.h>
 #include <linux/blkdev.h>
 #include <linux/writeback.h>
+#include <linux/kmem_cache.h>
 
 static void add_element(mempool_t *pool, void *element)
 {
@@ -78,6 +79,8 @@ mempool_t *mempool_create_node(int min_n
 	init_waitqueue_head(&pool->wait);
 	pool->alloc = alloc_fn;
 	pool->free = free_fn;
+	if (alloc_fn == mempool_alloc_slab)
+		kmem_mark_nocharge((kmem_cache_t *)pool_data);
 
 	/*
 	 * First pre-allocate the guaranteed number of buffers.
@@ -119,6 +122,7 @@ int mempool_resize(mempool_t *pool, int 
 	unsigned long flags;
 
 	BUG_ON(new_min_nr <= 0);
+	gfp_mask &= ~__GFP_UBC;
 
 	spin_lock_irqsave(&pool->lock, flags);
 	if (new_min_nr <= pool->min_nr) {
@@ -212,6 +216,7 @@ void * mempool_alloc(mempool_t *pool, gf
 	gfp_mask |= __GFP_NOMEMALLOC;	/* don't allocate emergency reserves */
 	gfp_mask |= __GFP_NORETRY;	/* don't loop in __alloc_pages */
 	gfp_mask |= __GFP_NOWARN;	/* failures are OK */
+	gfp_mask &= ~__GFP_UBC;
 
 	gfp_temp = gfp_mask & ~(__GFP_WAIT|__GFP_IO);
 
diff -upr linux-2.6.16.46-0.12.orig/mm/mlock.c linux-2.6.16.46-0.12-027test011/mm/mlock.c
--- linux-2.6.16.46-0.12.orig/mm/mlock.c	2007-08-24 19:28:24.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/mm/mlock.c	2007-08-28 17:35:33.000000000 +0400
@@ -8,9 +8,11 @@
 #include <linux/capability.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
+#include <linux/module.h>
 #include <linux/mempolicy.h>
 #include <linux/syscalls.h>
 
+#include <ub/ub_vmpages.h>
 
 static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
 	unsigned long start, unsigned long end, unsigned int newflags)
@@ -25,6 +27,14 @@ static int mlock_fixup(struct vm_area_st
 		goto out;
 	}
 
+	if (newflags & VM_LOCKED) {
+		ret = ub_locked_charge(mm, end - start);
+		if (ret < 0) {
+			*prev = vma;
+			goto out;
+		}
+	}
+
 	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
 	*prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
 			  vma->vm_file, pgoff, vma_policy(vma));
@@ -38,13 +48,13 @@ static int mlock_fixup(struct vm_area_st
 	if (start != vma->vm_start) {
 		ret = split_vma(mm, vma, start, 1);
 		if (ret)
-			goto out;
+			goto out_uncharge;
 	}
 
 	if (end != vma->vm_end) {
 		ret = split_vma(mm, vma, end, 0);
 		if (ret)
-			goto out;
+			goto out_uncharge;
 	}
 
 success:
@@ -65,13 +75,19 @@ success:
 		pages = -pages;
 		if (!(newflags & VM_IO))
 			ret = make_pages_present(start, end);
-	}
+	} else
+		ub_locked_uncharge(mm, end - start);
 
 	vma->vm_mm->locked_vm -= pages;
 out:
 	if (ret == -ENOMEM)
 		ret = -EAGAIN;
 	return ret;
+
+out_uncharge:
+	if (newflags & VM_LOCKED)
+		ub_locked_uncharge(mm, end - start);
+	goto out;
 }
 
 static int do_mlock(unsigned long start, size_t len, int on)
@@ -148,6 +164,7 @@ asmlinkage long sys_mlock(unsigned long 
 	up_write(&current->mm->mmap_sem);
 	return error;
 }
+EXPORT_SYMBOL_GPL(sys_mlock);
 
 asmlinkage long sys_munlock(unsigned long start, size_t len)
 {
@@ -160,6 +177,7 @@ asmlinkage long sys_munlock(unsigned lon
 	up_write(&current->mm->mmap_sem);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(sys_munlock);
 
 static int do_mlockall(int flags)
 {
diff -upr linux-2.6.16.46-0.12.orig/mm/mmap.c linux-2.6.16.46-0.12-027test011/mm/mmap.c
--- linux-2.6.16.46-0.12.orig/mm/mmap.c	2007-08-24 19:28:37.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/mm/mmap.c	2007-08-28 17:35:33.000000000 +0400
@@ -25,6 +25,7 @@
 #include <linux/mount.h>
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
+#include <linux/virtinfo.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -34,9 +35,12 @@
 #define arch_mmap_check(addr, len, flags)	(0)
 #endif
 
+#include <ub/ub_vmpages.h>
+
 static void unmap_region(struct mm_struct *mm,
 		struct vm_area_struct *vma, struct vm_area_struct *prev,
 		unsigned long start, unsigned long end);
+static unsigned long __do_brk(unsigned long addr, unsigned long len, int soft);
 
 /*
  * WARNING: the debugging will use recursive algorithms so never enable this
@@ -91,6 +95,18 @@ int __vm_enough_memory(long pages, int c
 
 	vm_acct_memory(pages);
 
+#ifdef CONFIG_USER_RESOURCE
+	switch (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_ENOUGHMEM,
+				(void *)pages)
+			& (NOTIFY_OK | NOTIFY_FAIL)) {
+		case NOTIFY_OK:
+			return 0;
+		case NOTIFY_FAIL:
+			vm_unacct_memory(pages);
+			return -ENOMEM;
+	}
+#endif
+
 	/*
 	 * Sometimes we want to use more memory than we have
 	 */
@@ -205,6 +221,9 @@ static struct vm_area_struct *remove_vma
 	struct vm_area_struct *next = vma->vm_next;
 
 	might_sleep();
+
+	ub_memory_uncharge(vma->vm_mm, vma->vm_end - vma->vm_start,
+			vma->vm_flags, vma->vm_file);
 	if (vma->vm_ops && vma->vm_ops->close)
 		vma->vm_ops->close(vma);
 	if (vma->vm_file) {
@@ -248,7 +267,7 @@ asmlinkage unsigned long sys_brk(unsigne
 		goto out;
 
 	/* Ok, looks good - let it rip. */
-	if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
+	if (__do_brk(oldbrk, newbrk-oldbrk, UB_HARD) != oldbrk)
 		goto out;
 set_brk:
 	mm->brk = brk;
@@ -887,6 +906,7 @@ unsigned long do_mmap_pgoff(struct file 
 	struct rb_node ** rb_link, * rb_parent;
 	int accountable = 1;
 	unsigned long charged = 0, reqprot = prot;
+	unsigned long ub_charged = 0;
 
 	if (file) {
 		if (is_file_hugepages(file))
@@ -910,7 +930,7 @@ unsigned long do_mmap_pgoff(struct file 
 			prot |= PROT_EXEC;
 
 	if (!len)
-		return -EINVAL;
+		return addr;
 
 	error = arch_mmap_check(addr, len, flags);
 	if (error)
@@ -1043,6 +1063,11 @@ munmap_back:
 		}
 	}
 
+	if (ub_memory_charge(mm, len, vm_flags, file,
+				(flags & MAP_EXECPRIO ? UB_SOFT : UB_HARD)))
+		goto charge_error;
+	ub_charged = 1;
+
 	/*
 	 * Can we just expand an old private anonymous mapping?
 	 * The VM_SHARED test is necessary because shmem_zero_setup
@@ -1058,7 +1083,8 @@ munmap_back:
 	 * specific mapper. the address has already been validated, but
 	 * not unmapped, but the maps are removed from the list.
 	 */
-	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL |
+			(flags & MAP_EXECPRIO ? __GFP_SOFT_UBC : 0));
 	if (!vma) {
 		error = -ENOMEM;
 		goto unacct_error;
@@ -1087,6 +1113,19 @@ munmap_back:
 		error = file->f_op->mmap(file, vma);
 		if (error)
 			goto unmap_and_free_vma;
+		if (vm_flags != vma->vm_flags) {
+		/* 
+		 * ->vm_flags has been changed in f_op->mmap method.
+		 * We have to recharge ub memory.
+		 */
+			ub_memory_uncharge(mm, len, vm_flags, file);
+			if (ub_memory_charge(mm, len, vma->vm_flags, file,
+				(flags & MAP_EXECPRIO ? UB_SOFT : UB_HARD))) {
+				ub_charged = 0;
+				error = -ENOMEM;
+				goto unmap_and_free_vma;
+			}
+		}
 	} else if (vm_flags & VM_SHARED) {
 		error = shmem_zero_setup(vma);
 		if (error)
@@ -1152,6 +1191,9 @@ unmap_and_free_vma:
 free_vma:
 	kmem_cache_free(vm_area_cachep, vma);
 unacct_error:
+	if (ub_charged)
+		ub_memory_uncharge(mm, len, vm_flags, file);
+charge_error:
 	if (charged)
 		vm_unacct_memory(charged);
 	return error;
@@ -1481,12 +1523,16 @@ static int acct_stack_growth(struct vm_a
 			return -ENOMEM;
 	}
 
+	if (ub_memory_charge(mm, grow << PAGE_SHIFT, vma->vm_flags,
+				vma->vm_file, UB_SOFT))
+		goto fail_charge;
+
 	/*
 	 * Overcommit..  This must be the final test, as it will
 	 * update security statistics.
 	 */
 	if (security_vm_enough_memory(grow))
-		return -ENOMEM;
+		goto fail_sec;
 
 	/* Ok, everything looks good - let it rip */
 	mm->total_vm += grow;
@@ -1494,6 +1540,11 @@ static int acct_stack_growth(struct vm_a
 		mm->locked_vm += grow;
 	vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
 	return 0;
+
+fail_sec:
+	ub_memory_uncharge(mm, grow << PAGE_SHIFT, vma->vm_flags, vma->vm_file);
+fail_charge:
+	return -ENOMEM;
 }
 
 #if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
@@ -1756,6 +1807,7 @@ int split_vma(struct mm_struct * mm, str
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(split_vma);
 
 /* Munmap is split into 2 main parts -- this part which finds
  * what needs doing, and the areas themselves, which do the
@@ -1849,7 +1901,7 @@ static inline void verify_mm_writelocked
  *  anonymous maps.  eventually we may be able to do some
  *  brk-specific accounting here.
  */
-unsigned long do_brk(unsigned long addr, unsigned long len)
+static unsigned long __do_brk(unsigned long addr, unsigned long len, int soft)
 {
 	struct mm_struct * mm = current->mm;
 	struct vm_area_struct * vma, * prev;
@@ -1911,8 +1963,11 @@ unsigned long do_brk(unsigned long addr,
 	if (mm->map_count > sysctl_max_map_count)
 		return -ENOMEM;
 
+	if (ub_memory_charge(mm, len, flags, NULL, soft))
+		goto fail_charge;
+
 	if (security_vm_enough_memory(len >> PAGE_SHIFT))
-		return -ENOMEM;
+		goto fail_sec;
 
 	/* Can we just expand an old private anonymous mapping? */
 	if (vma_merge(mm, prev, addr, addr + len, flags,
@@ -1922,11 +1977,11 @@ unsigned long do_brk(unsigned long addr,
 	/*
 	 * create a vma struct for an anonymous mapping
 	 */
-	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
-	if (!vma) {
-		vm_unacct_memory(len >> PAGE_SHIFT);
-		return -ENOMEM;
-	}
+	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL |
+			(soft == UB_SOFT ? __GFP_SOFT_UBC : 0));
+	if (!vma)
+		goto fail_alloc;
+
 	memset(vma, 0, sizeof(*vma));
 
 	vma->vm_mm = mm;
@@ -1943,8 +1998,19 @@ out:
 		make_pages_present(addr, addr + len);
 	}
 	return addr;
+
+fail_alloc:
+	vm_unacct_memory(len >> PAGE_SHIFT);
+fail_sec:
+	ub_memory_uncharge(mm, len, flags, NULL);
+fail_charge:
+	return -ENOMEM;
 }
 
+unsigned long do_brk(unsigned long addr, unsigned long len)
+{
+	return __do_brk(addr, len, UB_SOFT);
+}
 EXPORT_SYMBOL(do_brk);
 
 /* Release all mmaps. */
diff -upr linux-2.6.16.46-0.12.orig/mm/mprotect.c linux-2.6.16.46-0.12-027test011/mm/mprotect.c
--- linux-2.6.16.46-0.12.orig/mm/mprotect.c	2007-08-24 19:28:23.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/mm/mprotect.c	2007-08-28 17:35:33.000000000 +0400
@@ -9,6 +9,7 @@
  */
 
 #include <linux/mm.h>
+#include <linux/module.h>
 #include <linux/hugetlb.h>
 #include <linux/slab.h>
 #include <linux/shm.h>
@@ -25,6 +26,8 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#include <ub/ub_vmpages.h>
+
 static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
 		unsigned long addr, unsigned long end, pgprot_t newprot)
 {
@@ -110,12 +113,20 @@ mprotect_fixup(struct vm_area_struct *vm
 	pgprot_t newprot;
 	pgoff_t pgoff;
 	int error;
+	unsigned long ch_size;
+	int ch_dir;
 
 	if (newflags == oldflags) {
 		*pprev = vma;
 		return 0;
 	}
 
+	error = -ENOMEM;
+	ch_size = nrpages - pages_in_vma_range(vma, start, end);
+	ch_dir = ub_protected_charge(mm, ch_size, newflags, vma);
+	if (ch_dir == PRIVVM_ERROR)
+		goto fail_ch;
+
 	/*
 	 * If we make a private mapping writable we increase our commit;
 	 * but (without finer accounting) cannot reduce our commit if we
@@ -128,7 +139,7 @@ mprotect_fixup(struct vm_area_struct *vm
 		if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED|VM_HUGETLB))) {
 			charged = nrpages;
 			if (security_vm_enough_memory(charged))
-				return -ENOMEM;
+				goto fail_sec;
 			newflags |= VM_ACCOUNT;
 		}
 	}
@@ -170,10 +181,16 @@ success:
 	change_protection(vma, start, end, newprot);
 	vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
 	vm_stat_account(mm, newflags, vma->vm_file, nrpages);
+	if (ch_dir == PRIVVM_TO_SHARED)
+		__ub_unused_privvm_dec(mm, ch_size);
 	return 0;
 
 fail:
 	vm_unacct_memory(charged);
+fail_sec:
+	if (ch_dir == PRIVVM_TO_PRIVATE)
+		__ub_unused_privvm_dec(mm, ch_size);
+fail_ch:
 	return error;
 }
 
@@ -286,3 +303,4 @@ out:
 	up_write(&current->mm->mmap_sem);
 	return error;
 }
+EXPORT_SYMBOL_GPL(sys_mprotect);
diff -upr linux-2.6.16.46-0.12.orig/mm/mremap.c linux-2.6.16.46-0.12-027test011/mm/mremap.c
--- linux-2.6.16.46-0.12.orig/mm/mremap.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/mm/mremap.c	2007-08-28 17:35:31.000000000 +0400
@@ -23,6 +23,8 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#include <ub/ub_vmpages.h>
+
 static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
 {
 	pgd_t *pgd;
@@ -166,17 +168,21 @@ static unsigned long move_vma(struct vm_
 	unsigned long hiwater_vm;
 	int split = 0;
 
+	if (ub_memory_charge(mm, new_len, vm_flags,
+				vma->vm_file, UB_HARD))
+		goto err;
+
 	/*
 	 * We'd prefer to avoid failure later on in do_munmap:
 	 * which may split one vma into three before unmapping.
 	 */
 	if (mm->map_count >= sysctl_max_map_count - 3)
-		return -ENOMEM;
+		goto err_nomem;
 
 	new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
 	new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff);
 	if (!new_vma)
-		return -ENOMEM;
+		goto err_nomem;
 
 	moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len);
 	if (moved_len < old_len) {
@@ -235,7 +241,13 @@ static unsigned long move_vma(struct vm_
 					   new_addr + new_len);
 	}
 
-	return new_addr;
+	if (new_addr != -ENOMEM)
+		return new_addr;
+
+err_nomem:
+	ub_memory_uncharge(mm, new_len, vm_flags, vma->vm_file);
+err:
+	return -ENOMEM;
 }
 
 /*
@@ -359,7 +371,15 @@ unsigned long do_mremap(unsigned long ad
 			max_addr = vma->vm_next->vm_start;
 		/* can we just expand the current mapping? */
 		if (max_addr - addr >= new_len) {
-			int pages = (new_len - old_len) >> PAGE_SHIFT;
+			unsigned long len;
+			int pages;
+
+			len = new_len - old_len;
+			pages = len >> PAGE_SHIFT;
+			ret = -ENOMEM;
+			if (ub_memory_charge(mm, len, vma->vm_flags,
+						vma->vm_file, UB_HARD))
+				goto out;
 
 			vma_adjust(vma, vma->vm_start,
 				addr + new_len, vma->vm_pgoff, NULL);
diff -upr linux-2.6.16.46-0.12.orig/mm/oom_kill.c linux-2.6.16.46-0.12-027test011/mm/oom_kill.c
--- linux-2.6.16.46-0.12.orig/mm/oom_kill.c	2007-08-24 19:28:24.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/mm/oom_kill.c	2007-08-28 17:35:36.000000000 +0400
@@ -23,6 +23,11 @@
 #include <linux/cpuset.h>
 #include <linux/module.h>
 #include <linux/notifier.h>
+#include <linux/slab.h>
+#include <linux/virtinfo.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_oom.h>
 
 /* #define DEBUG */
 
@@ -170,15 +175,15 @@ static inline int constrained_alloc(stru
  *
  * (not docbooked, we don't want this one cluttering up the manual)
  */
-static struct task_struct *select_bad_process(unsigned long *ppoints)
+struct task_struct *oom_select_bad_process(struct user_beancounter *ub)
 {
 	struct task_struct *g, *p;
 	struct task_struct *chosen = NULL;
 	struct timespec uptime;
-	*ppoints = 0;
+	unsigned long chosen_points = 0;
 
 	do_posix_clock_monotonic_gettime(&uptime);
-	do_each_thread(g, p) {
+	do_each_thread_all(g, p) {
 		unsigned long points;
 		int releasing;
 
@@ -190,6 +195,8 @@ static struct task_struct *select_bad_pr
 		/* If p's nodes don't overlap ours, it won't help to kill p. */
 		if (!cpuset_excl_nodes_overlap(p))
 			continue;
+		if (ub_oom_task_skip(ub, p))
+			continue;
 
 		/*
 		 * This is in the process of releasing memory so for wait it
@@ -203,11 +210,11 @@ static struct task_struct *select_bad_pr
 			return p;
 
 		points = badness(p, uptime.tv_sec);
-		if (points > *ppoints || !chosen) {
+		if (points > chosen_points || !chosen) {
 			chosen = p;
-			*ppoints = points;
+			chosen_points = points;
 		}
-	} while_each_thread(g, p);
+	} while_each_thread_all(g, p);
 	return chosen;
 }
 
@@ -244,11 +251,13 @@ static void __oom_kill_task(task_t *p, c
 	set_tsk_thread_flag(p, TIF_MEMDIE);
 
 	force_sig(SIGKILL, p);
+	ub_oom_task_killed(p);
 }
 
 static struct mm_struct *oom_kill_task(task_t *p, const char *message)
 {
 	struct mm_struct *mm = get_task_mm(p);
+	struct user_beancounter *ub;
 	task_t * g, * q;
 
 	if (!mm)
@@ -258,15 +267,17 @@ static struct mm_struct *oom_kill_task(t
 		return NULL;
 	}
 
+	ub = get_beancounter(mm_ub(mm));
+
 	/*
 	 * Don't kill the process if any threads are set to OOM_DISABLE
 	 */
-	do_each_thread(g, q) {
+	do_each_thread_all(g, q) {
 		if (q->mm == mm && q->oomkilladj == OOM_DISABLE) {
 			mmput(mm);
 			return NULL;
 		}
-	} while_each_thread(g, q);
+	} while_each_thread_all(g, q);
 
 	__oom_kill_task(p, message);
 
@@ -274,23 +285,24 @@ static struct mm_struct *oom_kill_task(t
 	 * kill all processes that share the ->mm (i.e. all threads),
 	 * but are in a different thread group
 	 */
-	do_each_thread(g, q) {
+	do_each_thread_all(g, q) {
 		if (q->mm == mm && q->tgid != p->tgid)
 			__oom_kill_task(q, message);
-	} while_each_thread(g, q);
+	} while_each_thread_all(g, q);
 
+	ub_oom_mm_killed(ub);
+	put_beancounter(ub);
 	return mm;
 }
 
-static struct mm_struct *oom_kill_process(struct task_struct *p,
-				unsigned long points, const char *message)
+struct mm_struct *oom_kill_process(struct task_struct *p, const char *message)
 {
  	struct mm_struct *mm;
 	struct task_struct *c;
 	struct list_head *tsk;
 
-	printk(KERN_ERR "Out of Memory: Kill process %d (%s) score %li and "
-		"children.\n", p->pid, p->comm, points);
+	printk(KERN_ERR "Out of Memory: Kill process %d (%s) and children.\n",
+			p->pid, p->comm);
 	/* Try to kill a child first */
 	list_for_each(tsk, &p->children) {
 		c = list_entry(tsk, struct task_struct, sibling);
@@ -329,22 +341,31 @@ void out_of_memory(struct zonelist *zone
 {
 	struct mm_struct *mm = NULL;
 	task_t *p;
-	unsigned long points = 0;
 	unsigned long freed = 0;
+	struct user_beancounter *ub;
 
 	notifier_call_chain(&oom_notify_list, 0, &freed);
 	if (freed > 0)
 		/* Got some memory back in the last second. */
 		return;
 
+	if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_OUTOFMEM, NULL)
+			& (NOTIFY_OK | NOTIFY_FAIL))
+		return;
+
+	cpuset_lock();
+	ub = NULL;
+	if (ub_oom_lock())
+		goto out_cpuset;
+
 	if (printk_ratelimit()) {
 		printk("oom-killer: gfp_mask=0x%x, order=%d\n",
 			gfp_mask, order);
 		dump_stack();
 		show_mem();
+		show_slab_info();
 	}
 
-	cpuset_lock();
 	read_lock(&tasklist_lock);
 
 	/*
@@ -353,34 +374,38 @@ void out_of_memory(struct zonelist *zone
 	 */
 	switch (constrained_alloc(zonelist, gfp_mask)) {
 	case CONSTRAINT_MEMORY_POLICY:
-		mm = oom_kill_process(current, points,
-				"No available memory (MPOL_BIND)");
+		mm = oom_kill_process(current, "No available memory (MPOL_BIND)");
 		break;
 
 	case CONSTRAINT_CPUSET:
-		mm = oom_kill_process(current, points,
-				"No available memory in cpuset");
+		mm = oom_kill_process(current, "No available memory in cpuset");
 		break;
 
 	case CONSTRAINT_NONE:
 retry:
+		put_beancounter(ub);
+
 		/*
 		 * Rambo mode: Shoot down a process and hope it solves whatever
 		 * issues we may have.
 		 */
-		p = select_bad_process(&points);
+		ub = ub_oom_select_worst();
+		p = oom_select_bad_process(ub);
 
 		if (PTR_ERR(p) == -1UL)
 			goto out;
 
 		/* Found nothing?!?! Either we hang forever, or we panic. */
 		if (!p) {
+			if (ub != NULL)
+				goto retry;
+
 			read_unlock(&tasklist_lock);
 			cpuset_unlock();
 			panic("Out of memory and no killable processes...\n");
 		}
 
-		mm = oom_kill_process(p, points, "Out of memory");
+		mm = oom_kill_process(p, "Out of memory");
 		if (!mm)
 			goto retry;
 
@@ -389,6 +414,9 @@ retry:
 
 out:
 	read_unlock(&tasklist_lock);
+	ub_oom_unlock();
+	put_beancounter(ub);
+out_cpuset:
 	cpuset_unlock();
 	if (mm)
 		mmput(mm);
diff -upr linux-2.6.16.46-0.12.orig/mm/page-writeback.c linux-2.6.16.46-0.12-027test011/mm/page-writeback.c
--- linux-2.6.16.46-0.12.orig/mm/page-writeback.c	2007-08-24 19:28:30.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/mm/page-writeback.c	2007-08-28 17:35:31.000000000 +0400
@@ -21,6 +21,7 @@
 #include <linux/writeback.h>
 #include <linux/init.h>
 #include <linux/backing-dev.h>
+#include <linux/task_io_accounting_ops.h>
 #include <linux/blkdev.h>
 #include <linux/mpage.h>
 #include <linux/percpu.h>
@@ -30,6 +31,8 @@
 #include <linux/cpu.h>
 #include <linux/syscalls.h>
 
+#include <ub/io_acct.h>
+
 /*
  * The maximum number of pages to writeout in a single bdflush/kupdate
  * operation.  We do this so we don't hold I_LOCK against an inode for
@@ -631,7 +634,9 @@ EXPORT_SYMBOL(write_one_page);
 int __set_page_dirty_nobuffers(struct page *page)
 {
 	int ret = 0;
+	int acct;
 
+	acct = 0;
 	if (!TestSetPageDirty(page)) {
 		struct address_space *mapping = page_mapping(page);
 		struct address_space *mapping2;
@@ -641,8 +646,10 @@ int __set_page_dirty_nobuffers(struct pa
 			mapping2 = page_mapping(page);
 			if (mapping2) { /* Race with truncate? */
 				BUG_ON(mapping2 != mapping);
-				if (mapping_cap_account_dirty(mapping))
+				if (mapping_cap_account_dirty(mapping)) {
 					inc_page_state(nr_dirty);
+					acct = 1;
+				}
 				radix_tree_tag_set(&mapping->page_tree,
 					page_index(page), PAGECACHE_TAG_DIRTY);
 			}
@@ -652,6 +659,8 @@ int __set_page_dirty_nobuffers(struct pa
 				__mark_inode_dirty(mapping->host,
 							I_DIRTY_PAGES);
 			}
+			if (acct)
+				task_io_account_write(page, PAGE_CACHE_SIZE, 0);
 		}
 	}
 	return ret;
@@ -747,8 +756,10 @@ int test_clear_page_dirty(struct page *p
 						page_index(page),
 						PAGECACHE_TAG_DIRTY);
 			write_unlock_irqrestore(&mapping->tree_lock, flags);
-			if (mapping_cap_account_dirty(mapping))
+			if (mapping_cap_account_dirty(mapping)) {
 				dec_page_state(nr_dirty);
+				ub_io_release_context(page, 0);
+			}
 			return 1;
 		}
 		write_unlock_irqrestore(&mapping->tree_lock, flags);
@@ -778,8 +789,10 @@ int clear_page_dirty_for_io(struct page 
 
 	if (mapping) {
 		if (TestClearPageDirty(page)) {
-			if (mapping_cap_account_dirty(mapping))
+			if (mapping_cap_account_dirty(mapping)) {
 				dec_page_state(nr_dirty);
+				ub_io_release_context(page, PAGE_CACHE_SIZE);
+			}
 			return 1;
 		}
 		return 0;
diff -upr linux-2.6.16.46-0.12.orig/mm/page_alloc.c linux-2.6.16.46-0.12-027test011/mm/page_alloc.c
--- linux-2.6.16.46-0.12.orig/mm/page_alloc.c	2007-08-24 19:28:37.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/mm/page_alloc.c	2007-08-28 17:35:36.000000000 +0400
@@ -41,6 +41,9 @@
 #include <asm/tlbflush.h>
 #include "internal.h"
 
+#include <ub/ub_mem.h>
+#include <ub/io_acct.h>
+
 /*
  * MCD - HACK: Find somewhere to initialize this EARLY, or make this
  * initializer cleaner
@@ -50,6 +53,7 @@ EXPORT_SYMBOL(node_online_map);
 nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
 EXPORT_SYMBOL(node_possible_map);
 struct pglist_data *pgdat_list __read_mostly;
+EXPORT_SYMBOL(pgdat_list);
 unsigned long totalram_pages __read_mostly;
 unsigned long totalhigh_pages __read_mostly;
 long nr_swap_pages;
@@ -71,6 +75,7 @@ static void __free_pages_ok(struct page 
  */
 int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 256, 32 };
 
+EXPORT_SYMBOL(nr_swap_pages);
 EXPORT_SYMBOL(totalram_pages);
 
 /*
@@ -156,12 +161,14 @@ static void bad_page(struct page *page)
 			1 << PG_cantrunc|
 			1 << PG_swapcache |
 			1 << PG_writeback |
-			1 << PG_buddy |
+#ifdef CONFIG_PAGE_STATES
+			1 << PG_foreign |
+			1 << PG_discarded |
+#endif
 #ifdef CONFIG_X86_XEN
 			1 << PG_pinned |
 #endif
-			1 << PG_foreign |
-			1 << PG_discarded );
+			1 << PG_buddy);
 	set_page_count(page, 0);
 	reset_page_mapcount(page);
 	page->mapping = NULL;
@@ -375,14 +382,19 @@ static inline int free_pages_check(struc
 			1 << PG_swapcache |
 			1 << PG_writeback |
 			1 << PG_reserved |
-			1 << PG_buddy	|
+#ifdef CONFIG_PAGE_STATES
+			1 << PG_foreign |
+#endif
 #ifdef CONFIG_X86_XEN
 			1 << PG_pinned	|
 #endif
-			1 << PG_foreign ))))
+			1 << PG_buddy))))
 		bad_page(page);
-	if (PageDirty(page))
+	if (PageDirty(page)) {
+		ub_io_release_context(page, 0);
 		__ClearPageDirty(page);
+	} else
+		ub_io_release_debug(page);
 	if (PageTruncate(page))
 		__ClearPageTruncate(page);
 	if (PageCanTrunc(page))
@@ -455,6 +467,7 @@ static void __free_pages_ok(struct page 
 	page_set_unused(page, order);
 
 	kernel_map_pages(page, 1 << order, 0);
+	ub_page_uncharge(page, order);
 	local_irq_save(flags);
 	__mod_page_state(pgfree, 1 << order);
 	free_one_page(page_zone(page), page, order);
@@ -546,11 +559,13 @@ static int prep_new_page(struct page *pa
 			1 << PG_swapcache |
 			1 << PG_writeback |
 			1 << PG_reserved |
-			1 << PG_buddy	|
+#ifdef CONFIG_PAGE_STATES
+			1 << PG_foreign |
+#endif
 #ifdef CONFIG_X86_XEN
 			1 << PG_pinned	|
 #endif
-			1 << PG_foreign ))))
+			1 << PG_buddy))))
 		bad_page(page);
 
 	/*
@@ -563,7 +578,10 @@ static int prep_new_page(struct page *pa
 	page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
 			1 << PG_referenced | 1 << PG_arch_1 |
 			1 << PG_checked | 1 << PG_mappedtodisk |
-			1 << PG_writable);
+#ifdef CONFIG_PAGE_STATES
+			1 << PG_writable |
+#endif
+			1 << PG_checkpointed);
 	set_page_private(page, 0);
 	set_page_refs(page, order);
 	kernel_map_pages(page, 1 << order, 1);
@@ -814,6 +832,7 @@ static void fastcall free_hot_cold_page(
 	kernel_map_pages(page, 1, 0);
 
 	pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
+	ub_page_uncharge(page, 0);
 	local_irq_save(flags);
 	__inc_page_state(pgfree);
 	list_add(&page->lru, &pcp->list);
@@ -988,6 +1007,26 @@ get_page_from_freelist(gfp_t gfp_mask, u
 	return page;
 }
 
+static void __alloc_collect_stats(gfp_t gfp_mask, unsigned int order,
+		struct page *page, cycles_t time)
+{
+	int ind;
+	unsigned long flags;
+
+	time = (jiffies - time) * cycles_per_jiffy;
+	if (!(gfp_mask & __GFP_WAIT))
+		ind = 0;
+	else if (!(gfp_mask & __GFP_HIGHMEM))
+		ind = (order > 0 ? 2 : 1);
+	else
+		ind = (order > 0 ? 4 : 3);
+	spin_lock_irqsave(&kstat_glb_lock, flags);
+	KSTAT_LAT_ADD(&kstat_glob.alloc_lat[ind], time);
+	if (!page)
+		kstat_glob.alloc_fails[ind]++;
+	spin_unlock_irqrestore(&kstat_glb_lock, flags);
+}
+
 /*
  * This is the 'heart' of the zoned buddy allocator.
  */
@@ -1003,6 +1042,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned i
 	int do_retry;
 	int alloc_flags;
 	int did_some_progress;
+	cycles_t start;
 
 	might_sleep_if(wait);
 
@@ -1014,6 +1054,7 @@ restart:
 		return NULL;
 	}
 
+	start = jiffies;
 	page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
 				zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET);
 	if (page)
@@ -1053,6 +1094,7 @@ restart:
 	if (page)
 		goto got_pg;
 
+rebalance:
 	/* This allocation should allow future memory freeing. */
 
 	if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE)))
@@ -1076,7 +1118,6 @@ nofail_alloc:
 	if (!wait)
 		goto nopage;
 
-rebalance:
 	cond_resched();
 
 	/* We now go into synchronous reclaim */
@@ -1133,6 +1174,7 @@ rebalance:
 	}
 
 nopage:
+	__alloc_collect_stats(gfp_mask, order, NULL, start);
 	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) {
 		if (!wait) {
 			printk(KERN_INFO "The following is only an harmless informational message.\n");
@@ -1146,7 +1188,14 @@ nopage:
 		dump_stack();
 		show_mem();
 	}
+	return NULL;
+
 got_pg:
+	__alloc_collect_stats(gfp_mask, order, page, start);
+	if (ub_page_charge(page, order, gfp_mask)) {
+		__free_pages(page, order);
+		page = NULL;
+	}
 	return page;
 }
 
@@ -1230,10 +1279,17 @@ unsigned int nr_free_pages(void)
 
 EXPORT_SYMBOL(nr_free_pages);
 
-#ifdef CONFIG_LKCD_DUMP_MODULE
-/* This symbol has to be exported to use 'for_each_pgdat' macro by modules. */
-EXPORT_SYMBOL_GPL(pgdat_list);
-#endif
+unsigned int nr_free_lowpages (void)
+{
+	pg_data_t *pgdat;
+	unsigned int pages = 0;
+
+	for_each_pgdat(pgdat)
+		pages += pgdat->node_zones[ZONE_NORMAL].free_pages;
+
+	return pages;
+}
+EXPORT_SYMBOL(nr_free_lowpages);
 
 
 #ifdef CONFIG_NUMA
@@ -1410,6 +1466,8 @@ void mod_page_state_offset(unsigned long
 }
 EXPORT_SYMBOL(mod_page_state_offset);
 
+#include <linux/virtinfo.h>
+
 void __get_zone_counts(unsigned long *active, unsigned long *inactive,
 			unsigned long *free, struct pglist_data *pgdat)
 {
@@ -2486,9 +2544,19 @@ static void *vmstat_start(struct seq_fil
 	m->private = ps;
 	if (!ps)
 		return ERR_PTR(-ENOMEM);
-	get_full_page_state(ps);
+	if (ve_is_super(get_exec_env()))
+		get_full_page_state(ps);
+	else
+		memset(ps, 0, sizeof(*ps));
 	ps->pgpgin /= 2;		/* sectors -> kbytes */
 	ps->pgpgout /= 2;
+	if (virtinfo_notifier_call(VITYPE_GENERAL,
+				VIRTINFO_VMSTAT, ps) & NOTIFY_FAIL) {
+		kfree(ps);
+		m->private = NULL;
+		return ERR_PTR(-ENOMSG);
+	}
+
 	return (unsigned long *)ps + *pos;
 }
 
diff -upr linux-2.6.16.46-0.12.orig/mm/readahead.c linux-2.6.16.46-0.12-027test011/mm/readahead.c
--- linux-2.6.16.46-0.12.orig/mm/readahead.c	2007-08-24 19:28:31.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/mm/readahead.c	2007-08-28 17:35:30.000000000 +0400
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
+#include <linux/task_io_accounting_ops.h>
 #include <linux/pagevec.h>
 
 void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
@@ -144,6 +145,7 @@ int read_cache_pages(struct address_spac
 			page_cache_release(page);
 			continue;
 		}
+		task_io_account_read(PAGE_CACHE_SIZE);
 		ret = filler(data, page);
 		if (!pagevec_add(&lru_pvec, page))
 			__pagevec_lru_add(&lru_pvec);
diff -upr linux-2.6.16.46-0.12.orig/mm/rmap.c linux-2.6.16.46-0.12-027test011/mm/rmap.c
--- linux-2.6.16.46-0.12.orig/mm/rmap.c	2007-08-24 19:28:27.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/mm/rmap.c	2007-08-28 17:35:33.000000000 +0400
@@ -54,8 +54,11 @@
 #include <linux/rcupdate.h>
 #include <linux/module.h>
 
+#include <ub/beancounter.h>
 #include <asm/tlbflush.h>
 
+#include <ub/ub_vmpages.h>
+
 //#define RMAP_DEBUG /* can be enabled only for debugging */
 
 kmem_cache_t *anon_vma_cachep;
@@ -117,6 +120,7 @@ int anon_vma_prepare(struct vm_area_stru
 	}
 	return 0;
 }
+EXPORT_SYMBOL_GPL(anon_vma_prepare);
 
 void __anon_vma_merge(struct vm_area_struct *vma, struct vm_area_struct *next)
 {
@@ -145,6 +149,7 @@ void anon_vma_link(struct vm_area_struct
 		spin_unlock(&anon_vma->lock);
 	}
 }
+EXPORT_SYMBOL_GPL(anon_vma_link);
 
 void anon_vma_unlink(struct vm_area_struct *vma)
 {
@@ -180,14 +185,15 @@ static void anon_vma_ctor(void *data, km
 void __init anon_vma_init(void)
 {
 	anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
-			0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor, NULL);
+			0, SLAB_DESTROY_BY_RCU|SLAB_PANIC|SLAB_UBC,
+			anon_vma_ctor, NULL);
 }
 
 /*
  * Getting a lock on a stable anon_vma from a page off the LRU is
  * tricky: page_lock_anon_vma rely on RCU to guard against the races.
  */
-static struct anon_vma *page_lock_anon_vma(struct page *page)
+struct anon_vma *page_lock_anon_vma(struct page *page)
 {
 	struct anon_vma *anon_vma = NULL;
 	unsigned long anon_mapping;
@@ -205,6 +211,7 @@ out:
 	rcu_read_unlock();
 	return anon_vma;
 }
+EXPORT_SYMBOL_GPL(page_lock_anon_vma);
 
 #ifdef CONFIG_MIGRATION
 /*
@@ -220,6 +227,7 @@ void remove_from_swap(struct page *page)
 	struct anon_vma *anon_vma;
 	struct vm_area_struct *vma;
 	unsigned long mapping;
+	struct page_beancounter *pb;
 
 	if (!PageSwapCache(page))
 		return;
@@ -229,6 +237,10 @@ void remove_from_swap(struct page *page)
 	if (!mapping || (mapping & PAGE_MAPPING_ANON) == 0)
 		return;
 
+	pb = NULL;
+	if (pb_alloc_all(&pb))
+		return;
+
 	/*
 	 * We hold the mmap_sem lock. So no need to call page_lock_anon_vma.
 	 */
@@ -236,10 +248,12 @@ void remove_from_swap(struct page *page)
 	spin_lock(&anon_vma->lock);
 
 	list_for_each_entry(vma, &anon_vma->head, anon_vma_node)
-		remove_vma_swap(vma, page);
+		remove_vma_swap(vma, page, &pb);
 
 	spin_unlock(&anon_vma->lock);
 	delete_from_swap_cache(page);
+
+	pb_free_list(&pb);
 }
 EXPORT_SYMBOL(remove_from_swap);
 #endif
@@ -582,6 +596,13 @@ void page_remove_rmap(struct page *page)
 			page_clear_dirty(page);
 			set_page_dirty(page);
 		}
+
+		/*
+		 * Well, when a page is unmapped, we cannot keep PG_checkpointed
+		 * flag, it is not accessible via process VM and we have no way
+		 * to reset its state
+		 */
+		ClearPageCheckpointed(page);
 		if (PageAnon(page))
 			__dec_page_state(nr_anon);
 		else
@@ -622,7 +643,10 @@ static int try_to_unmap_one(struct page 
 		 * and reload them. That happens after the VM_LOCKED bit
 		 * has been set.
 		 */
-		if (likely(!page_host_discards() || !PageDiscarded(page))) {
+#ifdef CONFIG_PAGE_STATES
+		if (likely(!page_host_discards() || !PageDiscarded(page)))
+#endif
+		{
 			ret = SWAP_FAIL;
 			goto out_unmap;
 		}
@@ -666,6 +690,9 @@ static int try_to_unmap_one(struct page 
 	}
 
 	page_remove_rmap(page);
+	ub_unused_privvm_inc(mm, vma);
+	ub_percpu_inc(mm->mm_ub, unmap);
+	pb_remove_ref(page, mm);
 	page_cache_release_nocheck(page);
 
 out_unmap:
@@ -756,6 +783,9 @@ static void try_to_unmap_cluster(unsigne
 			set_page_dirty_mapping(page);
 
 		page_remove_rmap(page);
+		ub_percpu_inc(mm->mm_ub, unmap);
+		pb_remove_ref(page, mm);
+		ub_unused_privvm_inc(mm, vma);
 		page_cache_release(page);
 		dec_mm_counter(mm, file_rss);
 		(*mapcount)--;
diff -upr linux-2.6.16.46-0.12.orig/mm/shmem.c linux-2.6.16.46-0.12-027test011/mm/shmem.c
--- linux-2.6.16.46-0.12.orig/mm/shmem.c	2007-08-24 19:28:31.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/mm/shmem.c	2007-08-28 17:35:33.000000000 +0400
@@ -52,6 +52,8 @@
 #include <asm/div64.h>
 #include <asm/pgtable.h>
 
+#include <ub/ub_vmpages.h>
+
 /* This magic number is used in glibc for posix shared memory */
 #define TMPFS_MAGIC	0x01021994
 
@@ -214,7 +216,7 @@ static void shmem_free_blocks(struct ino
  *
  * It has to be called with the spinlock held.
  */
-static void shmem_recalc_inode(struct inode *inode)
+static void shmem_recalc_inode(struct inode *inode, long swp_freed)
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	long freed;
@@ -224,6 +226,8 @@ static void shmem_recalc_inode(struct in
 		info->alloced -= freed;
 		shmem_unacct_blocks(info->flags, freed);
 		shmem_free_blocks(inode, freed);
+		if (freed > swp_freed)
+			ub_tmpfs_respages_sub(info, freed - swp_freed);
 	}
 }
 
@@ -329,6 +333,11 @@ static void shmem_swp_set(struct shmem_i
 		struct page *page = kmap_atomic_to_page(entry);
 		set_page_private(page, page_private(page) + incdec);
 	}
+
+	if (incdec == 1)
+		ub_tmpfs_respages_dec(info);
+	else
+		ub_tmpfs_respages_inc(info);
 }
 
 /*
@@ -345,14 +354,24 @@ static swp_entry_t *shmem_swp_alloc(stru
 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 	struct page *page = NULL;
 	swp_entry_t *entry;
+	unsigned long ub_val;
 
 	if (sgp != SGP_WRITE &&
 	    ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode))
 		return ERR_PTR(-EINVAL);
 
+	ub_val = 0;
+	if (info->next_index <= index) {
+		ub_val = index + 1 - info->next_index;
+		if (ub_shmpages_charge(info, ub_val))
+			return ERR_PTR(-ENOSPC);
+	}
+
 	while (!(entry = shmem_swp_entry(info, index, &page))) {
-		if (sgp == SGP_READ)
-			return shmem_swp_map(ZERO_PAGE(0));
+		if (sgp == SGP_READ) {
+			entry = shmem_swp_map(ZERO_PAGE(0));
+			goto out;
+		}
 		/*
 		 * Test free_blocks against 1 not 0, since we have 1 data
 		 * page (and perhaps indirect index pages) yet to allocate:
@@ -362,7 +381,8 @@ static swp_entry_t *shmem_swp_alloc(stru
 			spin_lock(&sbinfo->stat_lock);
 			if (sbinfo->free_blocks <= 1) {
 				spin_unlock(&sbinfo->stat_lock);
-				return ERR_PTR(-ENOSPC);
+				entry = ERR_PTR(-ENOSPC);
+				goto out;
 			}
 			sbinfo->free_blocks--;
 			inode->i_blocks += BLOCKS_PER_PAGE;
@@ -370,31 +390,43 @@ static swp_entry_t *shmem_swp_alloc(stru
 		}
 
 		spin_unlock(&info->lock);
-		page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) | __GFP_ZERO);
+		page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) |
+				__GFP_ZERO | __GFP_UBC);
 		if (page)
 			set_page_private(page, 0);
 		spin_lock(&info->lock);
 
 		if (!page) {
-			shmem_free_blocks(inode, 1);
-			return ERR_PTR(-ENOMEM);
+			entry = ERR_PTR(-ENOMEM);
+			goto out_block;
 		}
 		if (sgp != SGP_WRITE &&
 		    ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
 			entry = ERR_PTR(-EINVAL);
-			break;
+			goto out_dir;
 		}
-		if (info->next_index <= index)
+		if (info->next_index <= index) {
+			ub_val = 0;
 			info->next_index = index + 1;
+		}
 	}
 	if (page) {
 		/* another task gave its page, or truncated the file */
 		shmem_free_blocks(inode, 1);
 		shmem_dir_free(page);
 	}
-	if (info->next_index <= index && !IS_ERR(entry))
+	if (info->next_index <= index)
 		info->next_index = index + 1;
 	return entry;
+
+out_dir:
+	shmem_dir_free(page);
+out_block:
+	shmem_free_blocks(inode, 1);
+out:
+	if (ub_val)
+		ub_shmpages_uncharge(info, ub_val);
+	return entry;
 }
 
 /*
@@ -503,6 +535,7 @@ static void shmem_truncate_range(struct 
 		return;
 
 	spin_lock(&info->lock);
+	ub_shmpages_uncharge(info, info->next_index - idx);
 	info->flags |= SHMEM_TRUNCATE;
 	if (likely(end == (loff_t) -1)) {
 		limit = info->next_index;
@@ -681,7 +714,7 @@ done2:
 	info->swapped -= nr_swaps_freed;
 	if (nr_pages_to_free)
 		shmem_free_blocks(inode, nr_pages_to_free);
-	shmem_recalc_inode(inode);
+	shmem_recalc_inode(inode, nr_swaps_freed);
 	spin_unlock(&info->lock);
 
 	/*
@@ -770,6 +803,7 @@ static void shmem_delete_inode(struct in
 		sbinfo->free_inodes++;
 		spin_unlock(&sbinfo->stat_lock);
 	}
+	shmi_ub_put(info);
 	clear_inode(inode);
 }
 
@@ -891,6 +925,12 @@ int shmem_unuse(swp_entry_t entry, struc
 	return found;
 }
 
+#ifdef CONFIG_USER_RESOURCE
+#define shm_get_swap_page(info)	(get_swap_page((info)->shmi_ub))
+#else
+#define shm_get_swap_page(info)	(get_swap_page(NULL))
+#endif
+
 /*
  * Move the page from the page cache to the swap cache.
  */
@@ -911,12 +951,12 @@ static int shmem_writepage(struct page *
 	info = SHMEM_I(inode);
 	if (info->flags & VM_LOCKED)
 		goto redirty;
-	swap = get_swap_page();
+	swap = shm_get_swap_page(info);
 	if (!swap.val)
 		goto redirty;
 
 	spin_lock(&info->lock);
-	shmem_recalc_inode(inode);
+	shmem_recalc_inode(inode, 0);
 	if (index >= info->next_index) {
 		BUG_ON(!(info->flags & SHMEM_TRUNCATE));
 		goto unlock;
@@ -1104,7 +1144,7 @@ repeat:
 		goto failed;
 
 	spin_lock(&info->lock);
-	shmem_recalc_inode(inode);
+	shmem_recalc_inode(inode, 0);
 	entry = shmem_swp_alloc(info, idx, sgp);
 	if (IS_ERR(entry)) {
 		spin_unlock(&info->lock);
@@ -1286,6 +1326,7 @@ repeat:
 		spin_unlock(&info->lock);
 		flush_dcache_page(filepage);
 		SetPageUptodate(filepage);
+		ub_tmpfs_respages_inc(info);
 	}
 done:
 	if (*pagep != filepage) {
@@ -1387,28 +1428,6 @@ shmem_get_policy(struct vm_area_struct *
 }
 #endif
 
-int shmem_lock(struct file *file, int lock, struct user_struct *user)
-{
-	struct inode *inode = file->f_dentry->d_inode;
-	struct shmem_inode_info *info = SHMEM_I(inode);
-	int retval = -ENOMEM;
-
-	spin_lock(&info->lock);
-	if (lock && !(info->flags & VM_LOCKED)) {
-		if (!user_shm_lock(inode->i_size, user))
-			goto out_nomem;
-		info->flags |= VM_LOCKED;
-	}
-	if (!lock && (info->flags & VM_LOCKED) && user) {
-		user_shm_unlock(inode->i_size, user);
-		info->flags &= ~VM_LOCKED;
-	}
-	retval = 0;
-out_nomem:
-	spin_unlock(&info->lock);
-	return retval;
-}
-
 int shmem_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	file_accessed(file);
@@ -1445,6 +1464,7 @@ shmem_get_inode(struct super_block *sb, 
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		info = SHMEM_I(inode);
 		memset(info, 0, (char *)inode - (char *)info);
+		shmi_ub_set(info, get_exec_ub());
 		spin_lock_init(&info->lock);
 		INIT_LIST_HEAD(&info->swaplist);
 
@@ -2389,6 +2409,10 @@ static struct vm_operations_struct shmem
 #endif
 };
 
+int is_shmem_mapping(struct address_space *map)
+{
+	return (map != NULL && map->a_ops == &shmem_aops);
+}
 
 static struct super_block *shmem_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data)
@@ -2396,13 +2420,19 @@ static struct super_block *shmem_get_sb(
 	return get_sb_nodev(fs_type, flags, data, shmem_fill_super);
 }
 
-static struct file_system_type tmpfs_fs_type = {
+struct file_system_type tmpfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "tmpfs",
 	.get_sb		= shmem_get_sb,
 	.kill_sb	= kill_litter_super,
 };
+EXPORT_SYMBOL(tmpfs_fs_type);
+
+#ifdef CONFIG_VE
+#define shm_mnt	(get_exec_env()->shmem_mnt)
+#else
 static struct vfsmount *shm_mnt;
+#endif
 
 static int __init init_tmpfs(void)
 {
@@ -2439,6 +2469,36 @@ out3:
 }
 module_init(init_tmpfs)
 
+static inline int shm_charge_ahead(struct inode *inode)
+{
+#ifdef CONFIG_USER_RESOURCE
+	struct shmem_inode_info *info = SHMEM_I(inode);
+	unsigned long idx;
+	swp_entry_t *entry;
+
+	if (!inode->i_size)
+		return 0;
+	idx = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
+	/*
+	 * Just touch info to allocate space for entry and
+	 * make all UBC checks
+	 */
+	spin_lock(&info->lock);
+	entry = shmem_swp_alloc(info, idx, SGP_CACHE);
+	if (IS_ERR(entry))
+		goto err;
+	shmem_swp_unmap(entry);
+	spin_unlock(&info->lock);
+	return 0;
+
+err:
+	spin_unlock(&info->lock);
+	return PTR_ERR(entry);
+#else
+	return 0;
+#endif
+}
+
 /*
  * shmem_file_setup - get an unlinked file living in tmpfs
  *
@@ -2486,6 +2546,10 @@ struct file *shmem_file_setup(char *name
 	d_instantiate(dentry, inode);
 	inode->i_size = size;
 	inode->i_nlink = 0;	/* It is unlinked */
+	error = shm_charge_ahead(inode);
+	if (error)
+		goto close_file;
+
 	file->f_vfsmnt = mntget(shm_mnt);
 	file->f_dentry = dentry;
 	file->f_mapping = inode->i_mapping;
@@ -2501,6 +2565,7 @@ put_memory:
 	shmem_unacct_size(flags, size);
 	return ERR_PTR(error);
 }
+EXPORT_SYMBOL_GPL(shmem_file_setup);
 
 /*
  * shmem_zero_setup - setup a shared anonymous mapping
@@ -2518,6 +2583,8 @@ int shmem_zero_setup(struct vm_area_stru
 
 	if (vma->vm_file)
 		fput(vma->vm_file);
+	else if (vma->vm_flags & VM_WRITE)
+		__ub_unused_privvm_dec(vma->vm_mm, size >> PAGE_SHIFT);
 	vma->vm_file = file;
 	vma->vm_ops = &shmem_vm_ops;
 	return 0;
diff -upr linux-2.6.16.46-0.12.orig/mm/slab.c linux-2.6.16.46-0.12-027test011/mm/slab.c
--- linux-2.6.16.46-0.12.orig/mm/slab.c	2007-08-24 19:28:27.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/mm/slab.c	2007-08-28 17:35:36.000000000 +0400
@@ -106,32 +106,19 @@
 #include	<linux/nodemask.h>
 #include	<linux/mempolicy.h>
 #include	<linux/mutex.h>
+#include	<linux/kmem_slab.h>
+#include	<linux/kmem_cache.h>
 
 #include	<asm/uaccess.h>
 #include	<asm/cacheflush.h>
 #include	<asm/tlbflush.h>
 #include	<asm/page.h>
 
-/*
- * DEBUG	- 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
- *		  SLAB_RED_ZONE & SLAB_POISON.
- *		  0 for faster, smaller code (especially in the critical paths).
- *
- * STATS	- 1 to collect stats for /proc/slabinfo.
- *		  0 for faster, smaller code (especially in the critical paths).
- *
- * FORCED_DEBUG	- 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
- */
+#include	<ub/ub_mem.h>
 
-#ifdef CONFIG_DEBUG_SLAB
-#define	DEBUG		1
-#define	STATS		1
-#define	FORCED_DEBUG	1
-#else
-#define	DEBUG		0
-#define	STATS		0
-#define	FORCED_DEBUG	0
-#endif
+#define DEBUG		SLAB_DEBUG
+#define STATS		SLAB_STATS
+#define FORCED_DEBUG	SLAB_FORCED_DEBUG
 
 /* Shouldn't this be in a header file somewhere? */
 #define	BYTES_PER_WORD		sizeof(void *)
@@ -174,134 +161,22 @@
 			 SLAB_NO_REAP | SLAB_CACHE_DMA | \
 			 SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \
 			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
-			 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD)
+			 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
+			 SLAB_UBC | SLAB_NO_CHARGE)
 #else
 # define CREATE_MASK	(SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \
 			 SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \
 			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
-			 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD)
+			 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
+			 SLAB_UBC | SLAB_NO_CHARGE)
 #endif
 
-/*
- * kmem_bufctl_t:
- *
- * Bufctl's are used for linking objs within a slab
- * linked offsets.
- *
- * This implementation relies on "struct page" for locating the cache &
- * slab an object belongs to.
- * This allows the bufctl structure to be small (one int), but limits
- * the number of objects a slab (not a cache) can contain when off-slab
- * bufctls are used. The limit is the size of the largest general cache
- * that does not use off-slab slabs.
- * For 32bit archs with 4 kB pages, is this 56.
- * This is not serious, as it is only for large objects, when it is unwise
- * to have too many per slab.
- * Note: This limit can be raised by introducing a general cache whose size
- * is less than 512 (PAGE_SIZE<<3), but greater than 256.
- */
-
-typedef unsigned int kmem_bufctl_t;
-#define BUFCTL_END	(((kmem_bufctl_t)(~0U))-0)
-#define BUFCTL_FREE	(((kmem_bufctl_t)(~0U))-1)
-#define	SLAB_LIMIT	(((kmem_bufctl_t)(~0U))-2)
-
 /* Max number of objs-per-slab for caches which use off-slab slabs.
  * Needed to avoid a possible looping condition in cache_grow().
  */
 static unsigned long offslab_limit;
 
 /*
- * struct slab
- *
- * Manages the objs in a slab. Placed either at the beginning of mem allocated
- * for a slab, or allocated from an general cache.
- * Slabs are chained into three list: fully used, partial, fully free slabs.
- */
-struct slab {
-	struct list_head list;
-	unsigned long colouroff;
-	void *s_mem;		/* including colour offset */
-	unsigned int inuse;	/* num of objs active in slab */
-	kmem_bufctl_t free;
-	unsigned short nodeid;
-};
-
-/*
- * struct slab_rcu
- *
- * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to
- * arrange for kmem_freepages to be called via RCU.  This is useful if
- * we need to approach a kernel structure obliquely, from its address
- * obtained without the usual locking.  We can lock the structure to
- * stabilize it and check it's still at the given address, only if we
- * can be sure that the memory has not been meanwhile reused for some
- * other kind of object (which our subsystem's lock might corrupt).
- *
- * rcu_read_lock before reading the address, then rcu_read_unlock after
- * taking the spinlock within the structure expected at that address.
- *
- * We assume struct slab_rcu can overlay struct slab when destroying.
- */
-struct slab_rcu {
-	struct rcu_head head;
-	struct kmem_cache *cachep;
-	void *addr;
-};
-
-/*
- * struct array_cache
- *
- * Purpose:
- * - LIFO ordering, to hand out cache-warm objects from _alloc
- * - reduce the number of linked list operations
- * - reduce spinlock operations
- *
- * The limit is stored in the per-cpu structure to reduce the data cache
- * footprint.
- *
- */
-struct array_cache {
-	unsigned int avail;
-	unsigned int limit;
-	unsigned int batchcount;
-	unsigned int touched;
-	spinlock_t lock;
-	void *entry[0];		/*
-				 * Must have this definition in here for the proper
-				 * alignment of array_cache. Also simplifies accessing
-				 * the entries.
-				 * [0] is for gcc 2.95. It should really be [].
-				 */
-};
-
-/* bootstrap: The caches do not work without cpuarrays anymore,
- * but the cpuarrays are allocated from the generic caches...
- */
-#define BOOT_CPUCACHE_ENTRIES	1
-struct arraycache_init {
-	struct array_cache cache;
-	void *entries[BOOT_CPUCACHE_ENTRIES];
-};
-
-/*
- * The slab lists for all objects.
- */
-struct kmem_list3 {
-	struct list_head slabs_partial;	/* partial list first, better asm code */
-	struct list_head slabs_full;
-	struct list_head slabs_free;
-	unsigned long free_objects;
-	unsigned int free_limit;
-	unsigned int colour_next;	/* Per-node cache coloring */
-	spinlock_t list_lock;
-	struct array_cache *shared;	/* shared per node */
-	struct array_cache **alien;	/* on other nodes */
-	unsigned long next_reap;	/* updated without locking */
-	int free_touched;		/* updated without locking */
-};
-
-/*
  * Need this for bootstrapping a per node allocator.
  */
 #define NUM_INIT_LISTS (2 * MAX_NUMNODES + 1)
@@ -365,79 +240,6 @@ static void kmem_list3_init(struct kmem_
 	MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid);	\
 	} while (0)
 
-/*
- * struct kmem_cache
- *
- * manages a cache.
- */
-
-struct kmem_cache {
-/* 1) per-cpu data, touched during every alloc/free */
-	struct array_cache *array[NR_CPUS];
-	unsigned int batchcount;
-	unsigned int limit;
-	unsigned int shared;
-	unsigned int buffer_size;
-/* 2) touched by every alloc & free from the backend */
-	struct kmem_list3 *nodelists[MAX_NUMNODES];
-	unsigned int flags;	/* constant flags */
-	unsigned int num;	/* # of objs per slab */
-	spinlock_t spinlock;
-
-/* 3) cache_grow/shrink */
-	/* order of pgs per slab (2^n) */
-	unsigned int gfporder;
-
-	/* force GFP flags, e.g. GFP_DMA */
-	gfp_t gfpflags;
-
-	size_t colour;		/* cache colouring range */
-	unsigned int colour_off;	/* colour offset */
-	struct kmem_cache *slabp_cache;
-	unsigned int slab_size;
-	unsigned int dflags;	/* dynamic flags */
-
-	/* constructor func */
-	void (*ctor) (void *, struct kmem_cache *, unsigned long);
-
-	/* de-constructor func */
-	void (*dtor) (void *, struct kmem_cache *, unsigned long);
-
-/* 4) cache creation/removal */
-	const char *name;
-	struct list_head next;
-
-/* 5) statistics */
-#if STATS
-	unsigned long num_active;
-	unsigned long num_allocations;
-	unsigned long high_mark;
-	unsigned long grown;
-	unsigned long reaped;
-	unsigned long errors;
-	unsigned long max_freeable;
-	unsigned long node_allocs;
-	unsigned long node_frees;
-	atomic_t allochit;
-	atomic_t allocmiss;
-	atomic_t freehit;
-	atomic_t freemiss;
-#endif
-#if DEBUG
-	/*
-	 * If debugging is enabled, then the allocator can add additional
-	 * fields and/or padding to every object. buffer_size contains the total
-	 * object size including these internal fields, the following two
-	 * variables contain the offset to the user object and its size.
-	 */
-	int obj_offset;
-	int obj_size;
-#endif
-};
-
-#define CFLGS_OFF_SLAB		(0x80000000UL)
-#define	OFF_SLAB(x)	((x)->flags & CFLGS_OFF_SLAB)
-
 #define BATCHREFILL_LIMIT	16
 /* Optimization question: fewer reaps means less 
  * probability for unnessary cpucache drain/refill cycles.
@@ -448,12 +250,13 @@ struct kmem_cache {
 #define REAPTIMEOUT_CPUC	(2*HZ)
 #define REAPTIMEOUT_LIST3	(4*HZ)
 
+#define	STATS_INC_GROWN(x)	((x)->grown++)
+#define	STATS_INC_REAPED(x)	((x)->reaped++)
+#define	STATS_INC_SHRUNK(x)	((x)->shrunk++)
 #if STATS
 #define	STATS_INC_ACTIVE(x)	((x)->num_active++)
 #define	STATS_DEC_ACTIVE(x)	((x)->num_active--)
 #define	STATS_INC_ALLOCED(x)	((x)->num_allocations++)
-#define	STATS_INC_GROWN(x)	((x)->grown++)
-#define	STATS_INC_REAPED(x)	((x)->reaped++)
 #define	STATS_SET_HIGH(x)	do { if ((x)->num_active > (x)->high_mark) \
 					(x)->high_mark = (x)->num_active; \
 				} while (0)
@@ -473,8 +276,6 @@ struct kmem_cache {
 #define	STATS_INC_ACTIVE(x)	do { } while (0)
 #define	STATS_DEC_ACTIVE(x)	do { } while (0)
 #define	STATS_INC_ALLOCED(x)	do { } while (0)
-#define	STATS_INC_GROWN(x)	do { } while (0)
-#define	STATS_INC_REAPED(x)	do { } while (0)
 #define	STATS_SET_HIGH(x)	do { } while (0)
 #define	STATS_INC_ERR(x)	do { } while (0)
 #define	STATS_INC_NODEALLOCS(x)	do { } while (0)
@@ -574,47 +375,13 @@ static void **dbg_userword(struct kmem_c
 #define	BREAK_GFP_ORDER_LO	0
 static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
 
-/* Functions for storing/retrieving the cachep and or slab from the
- * global 'mem_map'. These are used to find the slab an obj belongs to.
- * With kfree(), these are used to find the cache which an obj belongs to.
- */
-static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
-{
-	page->lru.next = (struct list_head *)cache;
-}
-
-static inline struct kmem_cache *page_get_cache(struct page *page)
-{
-	return (struct kmem_cache *)page->lru.next;
-}
-
-static inline void page_set_slab(struct page *page, struct slab *slab)
-{
-	page->lru.prev = (struct list_head *)slab;
-}
-
-static inline struct slab *page_get_slab(struct page *page)
-{
-	return (struct slab *)page->lru.prev;
-}
-
-static inline struct kmem_cache *virt_to_cache(const void *obj)
-{
-	struct page *page = virt_to_page(obj);
-	return page_get_cache(page);
-}
-
-static inline struct slab *virt_to_slab(const void *obj)
-{
-	struct page *page = virt_to_page(obj);
-	return page_get_slab(page);
-}
-
 /* These are the default caches for kmalloc. Custom caches can have other sizes. */
 struct cache_sizes malloc_sizes[] = {
 #define CACHE(x) { .cs_size = (x) },
 #include <linux/kmalloc_sizes.h>
 	CACHE(ULONG_MAX)
+#include <linux/kmalloc_sizes.h>
+	CACHE(ULONG_MAX)
 #undef CACHE
 };
 EXPORT_SYMBOL(malloc_sizes);
@@ -628,10 +395,17 @@ struct cache_names {
 static struct cache_names __initdata cache_names[] = {
 #define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },
 #include <linux/kmalloc_sizes.h>
+	{NULL,},
+#undef CACHE
+#define CACHE(x) { .name = "size-" #x "(UBC)", .name_dma = "size-" #x "(DMA,UBC)" },
+#include <linux/kmalloc_sizes.h>
 	{NULL,}
 #undef CACHE
 };
 
+int malloc_cache_num;
+EXPORT_SYMBOL(malloc_cache_num);
+
 static struct arraycache_init initarray_cache __initdata =
     { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
 static struct arraycache_init initarray_generic =
@@ -653,6 +427,7 @@ static struct kmem_cache cache_cache = {
 
 /* Guard access to the cache-chain. */
 static DEFINE_MUTEX(cache_chain_mutex);
+static DEFINE_SPINLOCK(cache_chain_lock);
 static struct list_head cache_chain;
 
 /*
@@ -698,6 +473,8 @@ static inline struct kmem_cache *__find_
 {
 	struct cache_sizes *csizep = malloc_sizes;
 
+	if (gfpflags & __GFP_UBC)
+		csizep += malloc_cache_num;
 #if DEBUG
 	/* This happens if someone tries to call
 	 * kmem_cache_create(), or __kmalloc(), before
@@ -724,9 +501,17 @@ struct kmem_cache *kmem_find_general_cac
 }
 EXPORT_SYMBOL(kmem_find_general_cachep);
 
-static size_t slab_mgmt_size(size_t nr_objs, size_t align)
+static size_t slab_mgmt_size_noalign(size_t nr_objs, int flags)
 {
-	return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);
+	size_t size_noub;
+
+	size_noub = sizeof(struct slab) + nr_objs * sizeof(kmem_bufctl_t);
+	return ALIGN(size_noub, UB_ALIGN(flags)) + nr_objs * UB_EXTRA(flags);
+}
+
+static size_t slab_mgmt_size(size_t nr_objs, size_t align, int flags)
+{
+	return ALIGN(slab_mgmt_size_noalign(nr_objs, flags), align);
 }
 
 /* Calculate the number of objects and left-over bytes for a given
@@ -770,20 +555,23 @@ static void cache_estimate(unsigned long
 		 * into account.
 		 */
 		nr_objs = (slab_size - sizeof(struct slab)) /
-			  (buffer_size + sizeof(kmem_bufctl_t));
+			  (buffer_size + sizeof(kmem_bufctl_t) +
+			   	UB_EXTRA(flags));
 
 		/*
 		 * This calculated number will be either the right
 		 * amount, or one greater than what we want.
 		 */
-		if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size
-		       > slab_size)
+		if (slab_mgmt_size(nr_objs, align, flags) +
+				nr_objs * buffer_size > slab_size)
 			nr_objs--;
+		BUG_ON(slab_mgmt_size(nr_objs, align, flags) +
+				nr_objs * buffer_size > slab_size);
 
 		if (nr_objs > SLAB_LIMIT)
 			nr_objs = SLAB_LIMIT;
 
-		mgmt_size = slab_mgmt_size(nr_objs, align);
+		mgmt_size = slab_mgmt_size(nr_objs, align, flags);
 	}
 	*num = nr_objs;
 	*left_over = slab_size - nr_objs*buffer_size - mgmt_size;
@@ -1271,9 +1059,10 @@ void __init kmem_cache_init(void)
 		    kmem_cache_create(names[INDEX_L3].name,
 				      sizes[INDEX_L3].cs_size,
 				      ARCH_KMALLOC_MINALIGN,
-				      (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL,
-				      NULL);
+				      (ARCH_KMALLOC_FLAGS |
+				       SLAB_PANIC), NULL, NULL);
 
+	for (i = 0; i < 2; i++) {
 	while (sizes->cs_size != ULONG_MAX) {
 		/*
 		 * For performance, all the general caches are L1 aligned.
@@ -1284,29 +1073,29 @@ void __init kmem_cache_init(void)
 		 */
 		if (!sizes->cs_cachep)
 			sizes->cs_cachep = kmem_cache_create(names->name,
-							     sizes->cs_size,
-							     ARCH_KMALLOC_MINALIGN,
-							     (ARCH_KMALLOC_FLAGS
-							      | SLAB_PANIC),
-							     NULL, NULL);
+					sizes->cs_size, ARCH_KMALLOC_MINALIGN,
+					(ARCH_KMALLOC_FLAGS
+					 | (i ? SLAB_UBC : 0) | SLAB_NO_CHARGE
+					 | SLAB_PANIC), NULL, NULL);
 
 		/* Inc off-slab bufctl limit until the ceiling is hit. */
-		if (!(OFF_SLAB(sizes->cs_cachep))) {
-			offslab_limit = sizes->cs_size - sizeof(struct slab);
-			offslab_limit /= sizeof(kmem_bufctl_t);
-		}
+		if (!(OFF_SLAB(sizes->cs_cachep)))
+			offslab_limit = sizes->cs_size;
 
 		sizes->cs_dmacachep = kmem_cache_create(names->name_dma,
-							sizes->cs_size,
-							ARCH_KMALLOC_MINALIGN,
-							(ARCH_KMALLOC_FLAGS |
-							 SLAB_CACHE_DMA |
-							 SLAB_PANIC), NULL,
-							NULL);
+				sizes->cs_size, ARCH_KMALLOC_MINALIGN,
+				(ARCH_KMALLOC_FLAGS
+				 | (i ? SLAB_UBC : 0) | SLAB_NO_CHARGE
+				 | SLAB_CACHE_DMA | SLAB_PANIC), NULL, NULL);
 
 		sizes++;
 		names++;
 	}
+	sizes++;
+	names++;
+	if (!i)
+		malloc_cache_num = sizes - malloc_sizes;
+	}
 	/* 4) Replace the bootstrap head arrays */
 	{
 		void *ptr;
@@ -1714,8 +1503,13 @@ static inline size_t calculate_slab_orde
 			continue;
 
 		/* More than offslab_limit objects will cause problems */
-		if ((flags & CFLGS_OFF_SLAB) && num > offslab_limit)
-			break;
+		if (flags & CFLGS_OFF_SLAB) {
+			unsigned long slab_size;
+
+			slab_size = slab_mgmt_size_noalign(num, flags);
+			if (slab_size > offslab_limit)
+				break;
+		}
 
 		/* Found something acceptable - save it away */
 		cachep->num = num;
@@ -1960,8 +1754,7 @@ kmem_cache_create (const char *name, siz
 		cachep = NULL;
 		goto oops;
 	}
-	slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
-			  + sizeof(struct slab), align);
+	slab_size = slab_mgmt_size(cachep->num, align, flags);
 
 	/*
 	 * If the slab has been placed off-slab, and we have enough space then
@@ -1974,8 +1767,7 @@ kmem_cache_create (const char *name, siz
 
 	if (flags & CFLGS_OFF_SLAB) {
 		/* really off slab. No need for manual alignment */
-		slab_size =
-		    cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
+		slab_size = slab_mgmt_size_noalign(cachep->num, flags);
 	}
 
 	cachep->colour_off = cache_line_size();
@@ -2054,7 +1846,10 @@ kmem_cache_create (const char *name, siz
 	}
 
 	/* cache setup completed, link it into the list */
+	set_cache_objuse(cachep);
+	spin_lock_irq(&cache_chain_lock);
 	list_add(&cachep->next, &cache_chain);
+	spin_unlock_irq(&cache_chain_lock);
       oops:
 	if (!cachep && (flags & SLAB_PANIC))
 		panic("kmem_cache_create(): failed to create slab `%s'\n",
@@ -2172,6 +1967,7 @@ static int __node_shrink(struct kmem_cac
 			BUG();
 #endif
 		list_del(&slabp->list);
+		STATS_INC_SHRUNK(cachep);
 
 		l3->free_objects -= cachep->num;
 		spin_unlock_irq(&l3->list_lock);
@@ -2250,13 +2046,17 @@ int kmem_cache_destroy(struct kmem_cache
 	/*
 	 * the chain is never empty, cache_cache is never destroyed
 	 */
+	spin_lock_irq(&cache_chain_lock);
 	list_del(&cachep->next);
+	spin_unlock_irq(&cache_chain_lock);
 	mutex_unlock(&cache_chain_mutex);
 
 	if (__cache_shrink(cachep)) {
 		slab_error(cachep, "Can't free all objects");
 		mutex_lock(&cache_chain_mutex);
+		spin_lock_irq(&cache_chain_lock);
 		list_add(&cachep->next, &cache_chain);
+		spin_unlock_irq(&cache_chain_lock);
 		mutex_unlock(&cache_chain_mutex);
 		unlock_cpu_hotplug();
 		return 1;
@@ -2276,6 +2076,8 @@ int kmem_cache_destroy(struct kmem_cache
 			kfree(l3);
 		}
 	}
+
+	ub_kmemcache_free(cachep);
 	kmem_cache_free(&cache_cache, cachep);
 
 	unlock_cpu_hotplug();
@@ -2292,7 +2094,8 @@ static struct slab *alloc_slabmgmt(struc
 
 	if (OFF_SLAB(cachep)) {
 		/* Slab management obj is off-slab. */
-		slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
+		slabp = kmem_cache_alloc(cachep->slabp_cache,
+				local_flags & (~__GFP_UBC));
 		if (!slabp)
 			return NULL;
 	} else {
@@ -2302,15 +2105,11 @@ static struct slab *alloc_slabmgmt(struc
 	slabp->inuse = 0;
 	slabp->colouroff = colour_off;
 	slabp->s_mem = objp + colour_off;
+	init_slab_ubps(cachep, slabp);
 
 	return slabp;
 }
 
-static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
-{
-	return (kmem_bufctl_t *) (slabp + 1);
-}
-
 static void cache_init_objs(struct kmem_cache *cachep,
 			    struct slab *slabp, unsigned long ctor_flags)
 {
@@ -2480,7 +2279,7 @@ static int cache_grow(struct kmem_cache 
 	/* Get mem for the objs.
 	 * Attempt to allocate a physical page from 'nodeid',
 	 */
-	if (!(objp = kmem_getpages(cachep, flags, nodeid)))
+	if (!(objp = kmem_getpages(cachep, flags & (~__GFP_UBC), nodeid)))
 		goto failed;
 
 	/* Get slab management. */
@@ -2827,10 +2626,15 @@ __cache_alloc(struct kmem_cache *cachep,
 
 	if (!objp)
 		objp = ____cache_alloc(cachep, flags);
-	local_irq_restore(save_flags);
 	objp = cache_alloc_debugcheck_after(cachep, flags, objp,
 					    caller);
 	prefetchw(objp);
+
+	if (objp && ub_slab_charge(cachep, objp, flags)) {
+		kmem_cache_free(cachep, objp);
+		objp = NULL;
+	}
+	local_irq_restore(save_flags);
 	return objp;
 }
 
@@ -2946,6 +2750,7 @@ static void free_block(struct kmem_cache
 		/* fixup slab chains */
 		if (slabp->inuse == 0) {
 			if (l3->free_objects > l3->free_limit) {
+				STATS_INC_SHRUNK(cachep);
 				l3->free_objects -= cachep->num;
 				slab_destroy(cachep, slabp);
 			} else {
@@ -2961,6 +2766,18 @@ static void free_block(struct kmem_cache
 	}
 }
 
+void kmem_cache_free_block(kmem_cache_t *cachep, void **objpp,
+		int nr_objects, int node)
+{
+	unsigned long flags;
+
+	if (!nr_objects)
+		return;
+	spin_lock_irqsave(&cachep->spinlock, flags);
+	free_block(cachep, objpp, nr_objects, node);
+	spin_unlock_irqrestore(&cachep->spinlock, flags);
+}
+
 static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
 {
 	int batchcount;
@@ -3027,6 +2844,8 @@ static inline void __cache_free(struct k
 	check_irq_off();
 	objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
 
+	ub_slab_uncharge(cachep, objp);
+
 	/* Make sure we are not freeing a object from another
 	 * node to the array cache on this cpu.
 	 */
@@ -3153,11 +2972,15 @@ void *kmem_cache_alloc_node(struct kmem_
 		ptr = ____cache_alloc(cachep, flags);
 	else
 		ptr = __cache_alloc_node(cachep, flags, nodeid);
-	local_irq_restore(save_flags);
 
 	ptr = cache_alloc_debugcheck_after(cachep, flags, ptr,
 					   __builtin_return_address(0));
 
+	if (ptr && ub_slab_charge(cachep, ptr, flags)) {
+		kmem_cache_free(cachep, ptr);
+		ptr = NULL;
+	}
+	local_irq_restore(save_flags);
 	return ptr;
 }
 EXPORT_SYMBOL(kmem_cache_alloc_node);
@@ -3237,10 +3060,10 @@ EXPORT_SYMBOL(__kmalloc_track_caller);
  *
  * @size: how many bytes of memory are required.
  */
-void *__alloc_percpu(size_t size)
+void *__alloc_percpu_mask(size_t size, gfp_t gfp)
 {
 	int i;
-	struct percpu_data *pdata = kmalloc(sizeof(*pdata), GFP_KERNEL);
+	struct percpu_data *pdata = kmalloc(sizeof(*pdata), gfp);
 
 	if (!pdata)
 		return NULL;
@@ -3254,9 +3077,9 @@ void *__alloc_percpu(size_t size)
 		int node = cpu_to_node(i);
 
 		if (node_online(node))
-			pdata->ptrs[i] = kmalloc_node(size, GFP_KERNEL, node);
+			pdata->ptrs[i] = kmalloc_node(size, gfp, node);
 		else
-			pdata->ptrs[i] = kmalloc(size, GFP_KERNEL);
+			pdata->ptrs[i] = kmalloc(size, gfp);
 
 		if (!pdata->ptrs[i])
 			goto unwind_oom;
@@ -3275,7 +3098,7 @@ void *__alloc_percpu(size_t size)
 	kfree(pdata);
 	return NULL;
 }
-EXPORT_SYMBOL(__alloc_percpu);
+EXPORT_SYMBOL(__alloc_percpu_mask);
 #endif
 
 /**
@@ -3581,6 +3404,7 @@ static void cache_reap(void *unused)
 {
 	struct list_head *walk;
 	struct kmem_list3 *l3;
+	int node;
 
 	if (!mutex_trylock(&cache_chain_mutex)) {
 		/* Give up. Setup the next iteration. */
@@ -3589,6 +3413,8 @@ static void cache_reap(void *unused)
 		return;
 	}
 
+	{KSTAT_PERF_ENTER(cache_reap)
+	preempt_disable();
 	list_for_each(walk, &cache_chain) {
 		struct kmem_cache *searchp;
 		struct list_head *p;
@@ -3603,6 +3429,12 @@ static void cache_reap(void *unused)
 		check_irq_on();
 
 		/*
+		 * In original kernel this thread is binded to a CPU,
+		 * but in VZ it can migrate from one physical 
+		 * CPU to another due to cond_resched() below.
+		 */
+		node = numa_node_id();
+		/*
 		 * We only take the l3 lock if absolutely necessary and we
 		 * have established with reasonable certainty that
 		 * we can do some work if the lock was obtained.
@@ -3661,13 +3493,86 @@ static void cache_reap(void *unused)
 			slab_destroy(searchp, slabp);
 		} while (--tofree > 0);
 	      next:
+		preempt_enable_no_resched();
 		cond_resched();
+		preempt_disable();
 	}
 	check_irq_on();
 	mutex_unlock(&cache_chain_mutex);
 	next_reap_node();
+	KSTAT_PERF_LEAVE(cache_reap)}
 	/* Setup the next iteration */
 	schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC);
+	preempt_enable();
+}
+
+#define SHOW_TOP_SLABS	7
+
+static unsigned long get_cache_size(kmem_cache_t *cachep)
+{
+	unsigned long slabs;
+	struct list_head *lh;
+	struct kmem_list3 *l3;
+	int node;
+
+	slabs = 0;
+
+	spin_lock(&cachep->spinlock);
+	for_each_online_node(node) {
+		l3 = cachep->nodelists[node];
+		if (!l3)
+			continue;
+
+		spin_lock(&l3->list_lock);
+		list_for_each(lh, &l3->slabs_full)
+			slabs++;
+		list_for_each(lh, &l3->slabs_partial)
+			slabs++;
+		list_for_each(lh, &l3->slabs_free)
+			slabs++;
+		spin_unlock_irq(&l3->list_lock);
+	}
+	spin_unlock(&cachep->spinlock);
+
+	return slabs * (PAGE_SIZE << cachep->gfporder) + (OFF_SLAB(cachep) ?
+			cachep->slabp_cache->buffer_size * slabs : 0);
+}
+
+void show_slab_info(void)
+{
+	int i, j;
+	unsigned long size;
+	kmem_cache_t *ptr;
+	unsigned long sizes[SHOW_TOP_SLABS];
+	kmem_cache_t *top[SHOW_TOP_SLABS];
+	unsigned long flags;
+
+	memset(top, 0, sizeof(top));
+	memset(sizes, 0, sizeof(sizes));
+
+	printk("Top %d caches:\n", SHOW_TOP_SLABS);
+	spin_lock_irqsave(&cache_chain_lock, flags);
+	list_for_each_entry(ptr, &cache_chain, next) {
+		size = get_cache_size(ptr);
+
+		j = 0;
+		for (i = 1; i < SHOW_TOP_SLABS; i++)
+			if (sizes[i] < sizes[j])
+				j = i;
+
+		if (size > sizes[j]) {
+			sizes[j] = size;
+			top[j] = ptr;
+		}
+	}
+
+	for (i = 0; i < SHOW_TOP_SLABS; i++)
+		if (top[i])
+			printk(" %-21s: size %10lu buffer_size %10u\n",
+					top[i]->name,
+					sizes[i],
+					top[i]->buffer_size);
+	spin_unlock_irqrestore(&cache_chain_lock, flags);
 }
 
 #ifdef CONFIG_PROC_FS
@@ -3689,7 +3594,7 @@ static void print_slabinfo_header(struct
 	seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
 #if STATS
 	seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
-		 "<error> <maxfreeable> <nodeallocs> <remotefrees>");
+		 "<error> <maxfreeable> <nodeallocs> <remotefrees> <shrunk>");
 	seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
 #endif
 	seq_putc(m, '\n');
@@ -3787,7 +3692,7 @@ static int s_show(struct seq_file *m, vo
 	if (error)
 		printk(KERN_ERR "slab: cache %s error: %s\n", name, error);
 
-	seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
+	seq_printf(m, "%-21s %6lu %6lu %6u %4u %4d",
 		   name, active_objs, num_objs, cachep->buffer_size,
 		   cachep->num, (1 << cachep->gfporder));
 	seq_printf(m, " : tunables %4u %4u %4u",
@@ -3800,13 +3705,16 @@ static int s_show(struct seq_file *m, vo
 		unsigned long allocs = cachep->num_allocations;
 		unsigned long grown = cachep->grown;
 		unsigned long reaped = cachep->reaped;
+		unsigned long shrunk = cachep->shrunk;
 		unsigned long errors = cachep->errors;
 		unsigned long max_freeable = cachep->max_freeable;
 		unsigned long node_allocs = cachep->node_allocs;
 		unsigned long node_frees = cachep->node_frees;
 
 		seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \
-				%4lu %4lu %4lu %4lu", allocs, high, grown, reaped, errors, max_freeable, node_allocs, node_frees);
+				%4lu %4lu %4lu %4lu %5lu",
+				allocs, high, grown, reaped, errors,
+				max_freeable, node_allocs, node_frees, shrunk);
 	}
 	/* cpu stats */
 	{
diff -upr linux-2.6.16.46-0.12.orig/mm/slob.c linux-2.6.16.46-0.12-027test011/mm/slob.c
--- linux-2.6.16.46-0.12.orig/mm/slob.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/mm/slob.c	2007-08-28 17:35:30.000000000 +0400
@@ -336,10 +336,10 @@ EXPORT_SYMBOL(slab_reclaim_pages);
 
 #ifdef CONFIG_SMP
 
-void *__alloc_percpu(size_t size)
+void *__alloc_percpu_mask(size_t size, gfp_t gfp)
 {
 	int i;
-	struct percpu_data *pdata = kmalloc(sizeof (*pdata), GFP_KERNEL);
+	struct percpu_data *pdata = kmalloc(sizeof (*pdata), gfp);
 
 	if (!pdata)
 		return NULL;
@@ -347,7 +347,7 @@ void *__alloc_percpu(size_t size)
 	for (i = 0; i < NR_CPUS; i++) {
 		if (!cpu_possible(i))
 			continue;
-		pdata->ptrs[i] = kmalloc(size, GFP_KERNEL);
+		pdata->ptrs[i] = kmalloc(size, gfp);
 		if (!pdata->ptrs[i])
 			goto unwind_oom;
 		memset(pdata->ptrs[i], 0, size);
@@ -365,7 +365,7 @@ unwind_oom:
 	kfree(pdata);
 	return NULL;
 }
-EXPORT_SYMBOL(__alloc_percpu);
+EXPORT_SYMBOL(__alloc_percpu_mask);
 
 void
 free_percpu(const void *objp)
diff -upr linux-2.6.16.46-0.12.orig/mm/swap.c linux-2.6.16.46-0.12-027test011/mm/swap.c
--- linux-2.6.16.46-0.12.orig/mm/swap.c	2007-08-24 19:28:23.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/mm/swap.c	2007-08-28 17:35:33.000000000 +0400
@@ -183,6 +183,8 @@ void fastcall lru_cache_add_active(struc
 	put_cpu_var(lru_add_active_pvecs);
 }
 
+EXPORT_SYMBOL(lru_cache_add_active);
+
 static void __lru_add_drain(int cpu)
 {
 	struct pagevec *pvec = &per_cpu(lru_add_pvecs, cpu);
@@ -201,6 +203,8 @@ void lru_add_drain(void)
 	put_cpu();
 }
 
+EXPORT_SYMBOL(lru_add_drain);
+
 #ifdef CONFIG_NUMA
 static void lru_add_drain_per_cpu(void *dummy)
 {
diff -upr linux-2.6.16.46-0.12.orig/mm/swap_state.c linux-2.6.16.46-0.12-027test011/mm/swap_state.c
--- linux-2.6.16.46-0.12.orig/mm/swap_state.c	2007-08-24 19:28:23.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/mm/swap_state.c	2007-08-28 17:35:33.000000000 +0400
@@ -18,6 +18,9 @@
 
 #include <asm/pgtable.h>
 
+#include <ub/ub_vmpages.h>
+#include <ub/io_acct.h>
+
 /*
  * swapper_space is a fiction, retained to simplify the path through
  * vmscan's shrink_list, to make sync_page look nicer, and to allow
@@ -42,6 +45,7 @@ struct address_space swapper_space = {
 	.i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear),
 	.backing_dev_info = &swap_backing_dev_info,
 };
+EXPORT_SYMBOL(swapper_space);
 
 #define INC_CACHE_INFO(x)	do { swap_cache_info.x++; } while (0)
 
@@ -53,6 +57,7 @@ static struct {
 	unsigned long noent_race;
 	unsigned long exist_race;
 } swap_cache_info;
+EXPORT_SYMBOL(swap_cache_info);
 
 void show_swap_cache_info(void)
 {
@@ -68,8 +73,7 @@ void show_swap_cache_info(void)
  * __add_to_swap_cache resembles add_to_page_cache on swapper_space,
  * but sets SwapCache flag and private instead of mapping and index.
  */
-static int __add_to_swap_cache(struct page *page, swp_entry_t entry,
-			       gfp_t gfp_mask)
+int __add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
 {
 	int error;
 
@@ -94,7 +98,9 @@ static int __add_to_swap_cache(struct pa
 	return error;
 }
 
-static int add_to_swap_cache(struct page *page, swp_entry_t entry)
+EXPORT_SYMBOL(__add_to_swap_cache);
+
+int add_to_swap_cache(struct page *page, swp_entry_t entry)
 {
 	int error;
 
@@ -116,6 +122,8 @@ static int add_to_swap_cache(struct page
 	return 0;
 }
 
+EXPORT_SYMBOL(add_to_swap_cache);
+
 /*
  * This must be called only on pages that have
  * been verified to be in the swap cache.
@@ -137,6 +145,7 @@ void inline __delete_from_swap_cache_noc
 
 void __delete_from_swap_cache(struct page *page)
 {
+#ifdef CONFIG_PAGE_STATES
 	/*
 	 * Check if the discard fault handler already removed
 	 * the page from the page cache. If not set the discard
@@ -145,9 +154,9 @@ void __delete_from_swap_cache(struct pag
 	 */
 	if (page_host_discards() && TestSetPageDiscarded(page))
 		return;
-
+#endif
 	__delete_from_swap_cache_nocheck(page);
-
+#ifdef CONFIG_PAGE_STATES
 	/*
 	 * Check the hardware page state and clear the discard
 	 * bit in the page flags only if the page is not
@@ -155,6 +164,7 @@ void __delete_from_swap_cache(struct pag
 	 */
 	if (page_host_discards() && !page_discarded(page))
 		ClearPageDiscarded(page);
+#endif
 }
 
 /**
@@ -173,7 +183,14 @@ int add_to_swap(struct page * page, gfp_
 		BUG();
 
 	for (;;) {
-		entry = get_swap_page();
+		struct user_beancounter *ub;
+
+		ub = pb_grab_page_ub(page);
+		if (IS_ERR(ub))
+			return 0;
+
+		entry = get_swap_page(ub);
+		put_beancounter(ub);
 		if (!entry.val)
 			return 0;
 
@@ -259,6 +276,7 @@ int move_from_swap_cache(struct page *pa
 		delete_from_swap_cache(page);
 		/* shift page from clean_pages to dirty_pages list */
 		ClearPageDirty(page);
+		ub_io_release_debug(page);
 		set_page_dirty(page);
 	}
 	return err;
@@ -386,3 +404,5 @@ struct page *read_swap_cache_async(swp_e
 		page_cache_release(new_page);
 	return found_page;
 }
+
+EXPORT_SYMBOL(read_swap_cache_async);
diff -upr linux-2.6.16.46-0.12.orig/mm/swapfile.c linux-2.6.16.46-0.12-027test011/mm/swapfile.c
--- linux-2.6.16.46-0.12.orig/mm/swapfile.c	2007-08-24 19:28:23.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/mm/swapfile.c	2007-08-28 17:35:33.000000000 +0400
@@ -37,6 +37,8 @@
 #include <asm/tlbflush.h>
 #include <linux/swapops.h>
 
+#include <ub/ub_vmpages.h>
+
 DEFINE_SPINLOCK(swap_lock);
 unsigned int nr_swapfiles;
 long total_swap_pages;
@@ -48,9 +50,13 @@ static const char Bad_offset[] = "Bad sw
 static const char Unused_offset[] = "Unused swap offset entry ";
 
 struct swap_list_t swap_list = {-1, -1};
-
 struct swap_info_struct swap_info[MAX_SWAPFILES];
 
+EXPORT_SYMBOL(total_swap_pages);
+EXPORT_SYMBOL(swap_lock);
+EXPORT_SYMBOL(swap_list);
+EXPORT_SYMBOL(swap_info);
+
 static DEFINE_MUTEX(swapon_mutex);
 
 /*
@@ -176,7 +182,7 @@ no_page:
 	return 0;
 }
 
-swp_entry_t get_swap_page(void)
+swp_entry_t get_swap_page(struct user_beancounter *ub)
 {
 	struct swap_info_struct *si;
 	pgoff_t offset;
@@ -197,6 +203,8 @@ swp_entry_t get_swap_page(void)
 			wrapped++;
 		}
 
+		if (si->flags & SWP_READONLY)
+			continue;
 		if (!si->highest_bit)
 			continue;
 		if (!(si->flags & SWP_WRITEOK))
@@ -206,6 +214,7 @@ swp_entry_t get_swap_page(void)
 		offset = scan_swap_map(si);
 		if (offset) {
 			spin_unlock(&swap_lock);
+			ub_swapentry_inc(si, offset, ub);
 			return swp_entry(type, offset);
 		}
 		next = swap_list.next;
@@ -217,6 +226,8 @@ noswap:
 	return (swp_entry_t) {0};
 }
 
+EXPORT_SYMBOL(get_swap_page);
+
 swp_entry_t get_swap_page_of_type(int type)
 {
 	struct swap_info_struct *si;
@@ -224,7 +235,7 @@ swp_entry_t get_swap_page_of_type(int ty
 
 	spin_lock(&swap_lock);
 	si = swap_info + type;
-	if (si->flags & SWP_WRITEOK) {
+	if (si->flags & SWP_WRITEOK && !(si->flags & SWP_READONLY)) {
 		nr_swap_pages--;
 		offset = scan_swap_map(si);
 		if (offset) {
@@ -281,6 +292,7 @@ static int swap_entry_free(struct swap_i
 		count--;
 		p->swap_map[offset] = count;
 		if (!count) {
+			ub_swapentry_dec(p, offset);
 			if (offset < p->lowest_bit)
 				p->lowest_bit = offset;
 			if (offset > p->highest_bit)
@@ -309,6 +321,8 @@ void swap_free(swp_entry_t entry)
 	}
 }
 
+EXPORT_SYMBOL(swap_free);
+
 /*
  * How many references to page are currently swapped out?
  */
@@ -392,6 +406,55 @@ int remove_exclusive_swap_page(struct pa
 	return retval;
 }
 
+int try_to_remove_exclusive_swap_page(struct page *page)
+{
+	int retval;
+	struct swap_info_struct * p;
+	swp_entry_t entry;
+
+	BUG_ON(PagePrivate(page));
+	BUG_ON(!PageLocked(page));
+
+	if (!PageSwapCache(page))
+		return 0;
+	if (PageWriteback(page))
+		return 0;
+	if (page_count(page) != 2) /* 2: us + cache */
+		return 0;
+
+	entry.val = page->private;
+	p = swap_info_get(entry);
+	if (!p)
+		return 0;
+
+	if (!vm_swap_full() &&
+			(p->flags & (SWP_ACTIVE|SWP_READONLY)) == SWP_ACTIVE) {
+		spin_unlock(&swap_lock);
+		return 0;
+	}
+
+	/* Is the only swap cache user the cache itself? */
+	retval = 0;
+	if (p->swap_map[swp_offset(entry)] == 1) {
+		/* Recheck the page count with the swapcache lock held.. */
+		write_lock_irq(&swapper_space.tree_lock);
+		if ((page_count(page) == 2) && !PageWriteback(page)) {
+			__delete_from_swap_cache(page);
+			SetPageDirty(page);
+			retval = 1;
+		}
+		write_unlock_irq(&swapper_space.tree_lock);
+	}
+	spin_unlock(&swap_lock);
+
+	if (retval) {
+		swap_free(entry);
+		page_cache_release(page);
+	}
+
+	return retval;
+}
+
 /*
  * Free the swap entry like above, but also try to
  * free the page cache entry if it is the last user.
@@ -439,17 +502,25 @@ void free_swap_and_cache(swp_entry_t ent
 	}
 }
 
+EXPORT_SYMBOL(free_swap_and_cache);
+
 /*
  * No need to decide whether this PTE shares the swap entry with others,
  * just let do_wp_page work it out if a write is requested later - to
  * force COW, vm_page_prot omits write permission from any private vma.
  */
 static void unuse_pte(struct vm_area_struct *vma, pte_t *pte,
-		unsigned long addr, swp_entry_t entry, struct page *page)
+		unsigned long addr, swp_entry_t entry, struct page *page,
+		struct page_beancounter **pb)
 {
-	inc_mm_counter(vma->vm_mm, anon_rss);
+	struct mm_struct *mm;
+
+	mm = vma->vm_mm;
+	inc_mm_counter(mm, anon_rss);
+	ub_unused_privvm_dec(mm, vma);
+	pb_add_ref(page, mm, pb);
 	get_page(page);
-	set_pte_at(vma->vm_mm, addr, pte,
+	set_pte_at(mm, addr, pte,
 		   pte_mkold(mk_pte(page, vma->vm_page_prot)));
 	page_add_anon_rmap(page, vma, addr);
 	swap_free(entry);
@@ -462,7 +533,8 @@ static void unuse_pte(struct vm_area_str
 
 static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 				unsigned long addr, unsigned long end,
-				swp_entry_t entry, struct page *page)
+				swp_entry_t entry, struct page *page,
+				struct page_beancounter **pb)
 {
 	pte_t swp_pte = swp_entry_to_pte(entry);
 	pte_t *pte;
@@ -476,7 +548,7 @@ static int unuse_pte_range(struct vm_are
 		 * Test inline before going to call unuse_pte.
 		 */
 		if (unlikely(pte_same(*pte, swp_pte))) {
-			unuse_pte(vma, pte++, addr, entry, page);
+			unuse_pte(vma, pte++, addr, entry, page, pb);
 			found = 1;
 			break;
 		}
@@ -487,7 +559,8 @@ static int unuse_pte_range(struct vm_are
 
 static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
 				unsigned long addr, unsigned long end,
-				swp_entry_t entry, struct page *page)
+				swp_entry_t entry, struct page *page,
+				struct page_beancounter **pb)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -497,7 +570,7 @@ static inline int unuse_pmd_range(struct
 		next = pmd_addr_end(addr, end);
 		if (pmd_none_or_clear_bad(pmd))
 			continue;
-		if (unuse_pte_range(vma, pmd, addr, next, entry, page))
+		if (unuse_pte_range(vma, pmd, addr, next, entry, page, pb))
 			return 1;
 	} while (pmd++, addr = next, addr != end);
 	return 0;
@@ -505,7 +578,8 @@ static inline int unuse_pmd_range(struct
 
 static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
 				unsigned long addr, unsigned long end,
-				swp_entry_t entry, struct page *page)
+				swp_entry_t entry, struct page *page,
+				struct page_beancounter **pb)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -515,14 +589,15 @@ static inline int unuse_pud_range(struct
 		next = pud_addr_end(addr, end);
 		if (pud_none_or_clear_bad(pud))
 			continue;
-		if (unuse_pmd_range(vma, pud, addr, next, entry, page))
+		if (unuse_pmd_range(vma, pud, addr, next, entry, page, pb))
 			return 1;
 	} while (pud++, addr = next, addr != end);
 	return 0;
 }
 
 static int unuse_vma(struct vm_area_struct *vma,
-				swp_entry_t entry, struct page *page)
+				swp_entry_t entry, struct page *page,
+				struct page_beancounter **pb)
 {
 	pgd_t *pgd;
 	unsigned long addr, end, next;
@@ -543,14 +618,15 @@ static int unuse_vma(struct vm_area_stru
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		if (unuse_pud_range(vma, pgd, addr, next, entry, page))
+		if (unuse_pud_range(vma, pgd, addr, next, entry, page, pb))
 			return 1;
 	} while (pgd++, addr = next, addr != end);
 	return 0;
 }
 
 static int unuse_mm(struct mm_struct *mm,
-				swp_entry_t entry, struct page *page)
+				swp_entry_t entry, struct page *page,
+				struct page_beancounter **pb)
 {
 	struct vm_area_struct *vma;
 
@@ -565,7 +641,7 @@ static int unuse_mm(struct mm_struct *mm
 		lock_page(page);
 	}
 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
-		if (vma->anon_vma && unuse_vma(vma, entry, page))
+		if (vma->anon_vma && unuse_vma(vma, entry, page, pb))
 			break;
 	}
 	up_read(&mm->mmap_sem);
@@ -577,11 +653,12 @@ static int unuse_mm(struct mm_struct *mm
 }
 
 #ifdef CONFIG_MIGRATION
-int remove_vma_swap(struct vm_area_struct *vma, struct page *page)
+int remove_vma_swap(struct vm_area_struct *vma, struct page *page,
+		struct page_beancounter **pb)
 {
 	swp_entry_t entry = { .val = page_private(page) };
 
-	return unuse_vma(vma, entry, page);
+	return unuse_vma(vma, entry, page, pb);
 }
 #endif
 
@@ -640,6 +717,7 @@ static int try_to_unuse(unsigned int typ
 	int retval = 0;
 	int reset_overflow = 0;
 	int shmem;
+	struct page_beancounter *pb;
 
 	/*
 	 * When searching mms for an entry, a good strategy is to
@@ -692,6 +770,13 @@ again:
 			break;
 		}
 
+		pb = NULL;
+		if (pb_alloc_all(&pb)) {
+			page_cache_release(page);
+			retval = -ENOMEM;
+			break;
+		}
+
 		/*
 		 * Don't hold on to start_mm if it looks like exiting.
 		 */
@@ -720,6 +805,20 @@ again:
 		}
 		wait_on_page_writeback(page);
 
+		/* If read failed we cannot map not-uptodate page to
+		 * user space. Actually, we are in serious troubles,
+		 * we do not even know what process to kill. So, the only
+		 * variant remains: to stop swapoff() and allow someone
+		 * to kill processes to zap invalid pages.
+		 */
+		if (unlikely(!PageUptodate(page))) {
+			pb_free_list(&pb);
+			unlock_page(page);
+			page_cache_release(page);
+			retval = -EIO;
+			break;
+		}
+
 		/*
 		 * Remove all references to entry.
 		 * Whenever we reach init_mm, there's no address space
@@ -731,7 +830,7 @@ again:
 			if (start_mm == &init_mm)
 				shmem = shmem_unuse(entry, page);
 			else
-				retval = unuse_mm(start_mm, entry, page);
+				retval = unuse_mm(start_mm, entry, page, &pb);
 		}
 		if (*swap_map > 1) {
 			int set_start_mm = (*swap_map >= swcount);
@@ -763,7 +862,7 @@ again:
 					set_start_mm = 1;
 					shmem = shmem_unuse(entry, page);
 				} else
-					retval = unuse_mm(mm, entry, page);
+					retval = unuse_mm(mm, entry, page, &pb);
 				if (set_start_mm && *swap_map < swcount) {
 					mmput(new_start_mm);
 					atomic_inc(&mm->mm_users);
@@ -777,6 +876,8 @@ again:
 			mmput(start_mm);
 			start_mm = new_start_mm;
 		}
+
+		pb_free_list(&pb);
 		if (retval) {
 			unlock_page(page);
 			page_cache_release(page);
@@ -1122,6 +1223,10 @@ asmlinkage long sys_swapoff(const char _
 	int i, type, prev;
 	int err;
 	
+	/* VE admin check is just to be on the safe side, the admin may affect
+	 * swaps only if he has access to special, i.e. if he has been granted
+	 * access to the block device or if the swap file is in the area
+	 * visible to him. */
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
@@ -1221,6 +1326,7 @@ asmlinkage long sys_swapoff(const char _
 	spin_unlock(&swap_lock);
 	mutex_unlock(&swapon_mutex);
 	vfree(swap_map);
+	ub_swap_fini(p);
 	inode = mapping->host;
 	if (S_ISBLK(inode->i_mode)) {
 		struct block_device *bdev = I_BDEV(inode);
@@ -1240,6 +1346,8 @@ out:
 	return err;
 }
 
+EXPORT_SYMBOL(sys_swapoff);
+
 #ifdef CONFIG_PROC_FS
 /* iterator */
 static void *swap_start(struct seq_file *swap, loff_t *pos)
@@ -1579,9 +1687,16 @@ asmlinkage long sys_swapon(const char __
 		goto bad_swap;
 	}
 
+	if (ub_swap_init(p, maxpages)) {
+		error = -ENOMEM;
+		goto bad_swap;
+	}
+
 	mutex_lock(&swapon_mutex);
 	spin_lock(&swap_lock);
 	p->flags = SWP_ACTIVE;
+	if (swap_flags & SWAP_FLAG_READONLY)
+		p->flags |= SWP_READONLY;
 	nr_swap_pages += nr_good_pages;
 	total_swap_pages += nr_good_pages;
 
@@ -1641,6 +1756,8 @@ out:
 	return error;
 }
 
+EXPORT_SYMBOL(sys_swapon);
+
 void si_swapinfo(struct sysinfo *val)
 {
 	unsigned int i;
@@ -1715,6 +1832,8 @@ bad_file:
 	goto out;
 }
 
+EXPORT_SYMBOL(swap_duplicate);
+
 struct swap_info_struct *
 get_swap_info_struct(unsigned type)
 {
diff -upr linux-2.6.16.46-0.12.orig/mm/truncate.c linux-2.6.16.46-0.12-027test011/mm/truncate.c
--- linux-2.6.16.46-0.12.orig/mm/truncate.c	2007-08-24 19:28:27.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/mm/truncate.c	2007-08-28 17:35:36.000000000 +0400
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/pagemap.h>
 #include <linux/pagevec.h>
+#include <linux/task_io_accounting_ops.h>
 #include <linux/buffer_head.h>	/* grr. try_to_release_page,
 				   do_invalidatepage */
 
@@ -42,7 +43,8 @@ truncate_complete_page(struct address_sp
 	if (PagePrivate(page))
 		do_invalidatepage(page, 0);
 
-	clear_page_dirty(page);
+	if (test_clear_page_dirty(page))
+		task_io_account_cancelled_write(PAGE_CACHE_SIZE);
 	ClearPageUptodate(page);
 	ClearPageMappedToDisk(page);
 	remove_from_page_cache(page);
@@ -316,6 +318,15 @@ failed:
 	return 0;
 }
 
+static int do_launder_page(struct address_space *mapping, struct page *page)
+{
+	if (!PageDirty(page))
+		return 0;
+	if (page->mapping != mapping || mapping->a_ops->launder_page == NULL)
+		return 0;
+	return mapping->a_ops->launder_page(page);
+}
+
 /**
  * invalidate_inode_pages2_range - remove range of pages from an address_space
  * @mapping: the address_space
@@ -386,7 +397,8 @@ int invalidate_inode_pages2_range(struct
 				}
 			}
 
-			if (!invalidate_complete_page2(mapping, page))
+			ret = do_launder_page(mapping, page);
+			if (ret == 0 && !invalidate_complete_page2(mapping, page))
 				ret = -EIO;
 			unlock_page(page);
 		}
diff -upr linux-2.6.16.46-0.12.orig/mm/vmalloc.c linux-2.6.16.46-0.12-027test011/mm/vmalloc.c
--- linux-2.6.16.46-0.12.orig/mm/vmalloc.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/mm/vmalloc.c	2007-08-28 17:35:31.000000000 +0400
@@ -20,6 +20,9 @@
 #include <asm/uaccess.h>
 #include <asm/tlbflush.h>
 
+#include <ub/ub_mem.h>
+#include <ub/ub_debug.h>
+
 
 DEFINE_RWLOCK(vmlist_lock);
 struct vm_struct *vmlist;
@@ -256,6 +259,70 @@ struct vm_struct *get_vm_area_node(unsig
 	return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, node);
 }
 
+struct vm_struct * get_vm_area_best(unsigned long size, unsigned long flags)
+{
+	unsigned long addr, best_addr, delta, best_delta;
+	struct vm_struct **p, **best_p, *tmp, *area;
+
+	area = (struct vm_struct *) kmalloc(sizeof(*area), GFP_KERNEL);
+	if (!area)
+		return NULL;
+
+	size += PAGE_SIZE; /* one-page gap at the end */
+	addr = VMALLOC_START;
+	best_addr = 0UL;
+	best_p = NULL;
+	best_delta = PAGE_ALIGN(VMALLOC_END) - VMALLOC_START;
+
+	write_lock(&vmlist_lock);
+	for (p = &vmlist; (tmp = *p) &&
+			(tmp->addr <= (void *)PAGE_ALIGN(VMALLOC_END));
+			p = &tmp->next) {
+		if ((unsigned long)tmp->addr < addr)
+			continue;
+		if ((size + addr) < addr)
+			break;
+		delta = (unsigned long) tmp->addr - (size + addr);
+		if (delta < best_delta) {
+			best_delta = delta;
+			best_addr = addr;
+			best_p = p;
+		}
+		addr = tmp->size + (unsigned long) tmp->addr;
+		if (addr > VMALLOC_END-size)
+			break;
+	}
+
+	if (!tmp || (tmp->addr > (void *)PAGE_ALIGN(VMALLOC_END))) {
+		/* check free area after list end */
+		delta = (unsigned long) PAGE_ALIGN(VMALLOC_END) - (size + addr);
+		if (delta < best_delta) {
+			best_delta = delta;
+			best_addr = addr;
+			best_p = p;
+		}
+	}
+	if (best_addr) {
+		area->flags = flags;
+		/* allocate at the end of this area */
+		area->addr = (void *)(best_addr + best_delta);
+		area->size = size;
+		area->next = *best_p;
+		area->pages = NULL;
+		area->nr_pages = 0;
+		area->phys_addr = 0;
+		*best_p = area;
+		/* check like in __vunmap */
+		WARN_ON((PAGE_SIZE - 1) & (unsigned long)area->addr);
+	} else {
+		kfree(area);
+		area = NULL;
+	}
+	write_unlock(&vmlist_lock);
+
+	return area;
+}
+
 /* Caller must hold vmlist_lock */
 struct vm_struct *__remove_vm_area(void *addr)
 {
@@ -296,7 +363,7 @@ struct vm_struct *remove_vm_area(void *a
 	return v;
 }
 
-void __vunmap(void *addr, int deallocate_pages)
+void __vunmap(void *addr, int deallocate_pages, int uncharge)
 {
 	struct vm_struct *area;
 
@@ -320,6 +387,8 @@ void __vunmap(void *addr, int deallocate
 	if (deallocate_pages) {
 		int i;
 
+		if (uncharge)
+			dec_vmalloc_charged(area);
 		for (i = 0; i < area->nr_pages; i++) {
 			if (unlikely(!area->pages[i]))
 				BUG();
@@ -350,7 +419,7 @@ void __vunmap(void *addr, int deallocate
 void vfree(void *addr)
 {
 	BUG_ON(in_interrupt());
-	__vunmap(addr, 1);
+	__vunmap(addr, 1, 1);
 }
 EXPORT_SYMBOL(vfree);
 
@@ -367,7 +436,7 @@ EXPORT_SYMBOL(vfree);
 void vunmap(void *addr)
 {
 	BUG_ON(in_interrupt());
-	__vunmap(addr, 0);
+	__vunmap(addr, 0, 0);
 }
 EXPORT_SYMBOL(vunmap);
 
@@ -439,10 +508,12 @@ void *__vmalloc_area_node(struct vm_stru
 
 	if (map_vm_area(area, prot, &pages))
 		goto fail;
+
+	inc_vmalloc_charged(area, gfp_mask);
 	return area->addr;
 
 fail:
-	vfree(area->addr);
+	__vunmap(area->addr, 1, 0);
 	return NULL;
 }
 
@@ -486,6 +557,21 @@ void *__vmalloc(unsigned long size, gfp_
 }
 EXPORT_SYMBOL(__vmalloc);
 
+static void *____vmalloc(unsigned long size, gfp_t mask, pgprot_t prot)
+{
+	struct vm_struct *area;
+
+	size = PAGE_ALIGN(size);
+	if (!size || (size >> PAGE_SHIFT) > num_physpages)
+		return NULL;
+
+	area = get_vm_area_best(size, VM_ALLOC);
+	if (!area)
+		return NULL;
+
+	return __vmalloc_area_node(area, mask, prot, -1);
+}
+
 /**
  *	vmalloc  -  allocate virtually contiguous memory
  *
@@ -503,6 +589,26 @@ void *vmalloc(unsigned long size)
 }
 EXPORT_SYMBOL(vmalloc);
 
+void *ub_vmalloc(unsigned long size)
+{
+	return __vmalloc(size, GFP_KERNEL_UBC | __GFP_HIGHMEM, PAGE_KERNEL);
+}
+EXPORT_SYMBOL(ub_vmalloc);
+
+void *vmalloc_best(unsigned long size)
+{
+       return ____vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
+}
+
+EXPORT_SYMBOL(vmalloc_best);
+
+void *ub_vmalloc_best(unsigned long size)
+{
+       return ____vmalloc(size, GFP_KERNEL_UBC | __GFP_HIGHMEM, PAGE_KERNEL);
+}
+
+EXPORT_SYMBOL(ub_vmalloc_best);
+
 /**
  *	vmalloc_node  -  allocate memory on a specific node
  *
@@ -521,6 +627,12 @@ void *vmalloc_node(unsigned long size, i
 }
 EXPORT_SYMBOL(vmalloc_node);
 
+void *ub_vmalloc_node(unsigned long size, int node)
+{
+	return __vmalloc_node(size, GFP_KERNEL_UBC | __GFP_HIGHMEM, PAGE_KERNEL, node);
+}
+EXPORT_SYMBOL(ub_vmalloc_node);
+
 #ifndef PAGE_KERNEL_EXEC
 # define PAGE_KERNEL_EXEC PAGE_KERNEL
 #endif
@@ -631,3 +743,37 @@ finished:
 	read_unlock(&vmlist_lock);
 	return buf - buf_start;
 }
+
+void vprintstat(void)
+{
+	struct vm_struct *p, *last_p = NULL;
+	unsigned long addr, size, free_size, max_free_size;
+	int num;
+
+	addr = VMALLOC_START;
+	size = max_free_size = 0;
+	num = 0;
+
+	read_lock(&vmlist_lock);
+	for (p = vmlist; p; p = p->next) {
+		free_size = (unsigned long)p->addr - addr;
+		if (free_size > max_free_size)
+			max_free_size = free_size;
+		addr = (unsigned long)p->addr + p->size;
+		size += p->size;
+		++num;
+		last_p = p;
+	}
+	if (last_p) {
+		free_size = VMALLOC_END -
+			((unsigned long)last_p->addr + last_p->size);
+		if (free_size > max_free_size)
+			max_free_size = free_size;
+	}
+	read_unlock(&vmlist_lock);
+
+	printk("VMALLOC Used: %luKB Total: %luKB Entries: %d\n"
+			"    Max_Free: %luKB Start: %lx End: %lx\n",
+			size/1024, (VMALLOC_END - VMALLOC_START)/1024, num,
+			max_free_size/1024, VMALLOC_START, VMALLOC_END);
+}
diff -upr linux-2.6.16.46-0.12.orig/mm/vmscan.c linux-2.6.16.46-0.12-027test011/mm/vmscan.c
--- linux-2.6.16.46-0.12.orig/mm/vmscan.c	2007-08-24 19:28:30.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/mm/vmscan.c	2007-08-28 17:35:31.000000000 +0400
@@ -34,6 +34,9 @@
 #include <linux/notifier.h>
 #include <linux/rwsem.h>
 
+#include <ub/ub_oom.h>
+#include <ub/io_acct.h>
+
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
 
@@ -337,6 +340,7 @@ static pageout_t pageout(struct page *pa
 		 */
 		if (PagePrivate(page)) {
 			if (try_to_free_buffers(page)) {
+				ub_io_release_context(page, 0);
 				ClearPageDirty(page);
 				printk("%s: orphaned page\n", __FUNCTION__);
 				return PAGE_CLEAN;
@@ -444,10 +448,10 @@ static int shrink_list(struct list_head 
 		BUG_ON(PageActive(page));
 
 		sc->nr_scanned++;
-
+#ifdef CONFIG_PAGE_STATES
 		if (page_host_discards() && unlikely(PageDiscarded(page)))
 			goto free_it;
-
+#endif
 		if (!sc->may_swap && page_mapped(page))
 			goto keep_locked;
 
@@ -1172,8 +1176,11 @@ static void shrink_cache(struct zone *zo
 			 * Only readd the page to lru list if it has not
 			 * been discarded.
 			 */
-			if (!page_host_discards() ||
-			    likely(!PageDiscarded(page))) {
+			if (!page_host_discards()
+#ifdef CONFIG_PAGE_STATES
+					|| likely(!PageDiscarded(page))
+#endif
+					) {
 				if (TestSetPageLRU(page))
 					BUG();
 				if (PageActive(page))
@@ -1275,6 +1282,7 @@ force_reclaim_mapped:
 			reclaim_mapped = 1;
 	}
 
+	{KSTAT_PERF_ENTER(refill_inact)
 	lru_add_drain();
 	spin_lock_irq(&zone->lru_lock);
 	pgmoved = isolate_lru_pages(nr_pages, &zone->active_list,
@@ -1308,7 +1316,11 @@ force_reclaim_mapped:
 		 * Only readd the page to lru list if it has not
 		 * been discarded.
 		 */
-		if (!page_host_discards() || likely(!PageDiscarded(page))) {
+		if (!page_host_discards()
+#ifdef CONFIG_PAGE_STATES
+				|| likely(!PageDiscarded(page))
+#endif
+				) {
 			if (TestSetPageLRU(page))
 				BUG();
 			if (!TestClearPageActive(page))
@@ -1346,7 +1358,11 @@ force_reclaim_mapped:
 		 * Only readd the page to lru list if it has not
 		 * been discarded.
 		 */
- 		if (!page_host_discards() || likely(!PageDiscarded(page))) {
+ 		if (!page_host_discards()
+#ifdef CONFIG_PAGE_STATES
+				|| likely(!PageDiscarded(page))
+#endif
+				) {
 			if (TestSetPageLRU(page))
 				BUG();
 			BUG_ON(!PageActive(page));
@@ -1373,6 +1389,7 @@ force_reclaim_mapped:
 	local_irq_enable();
 
 	pagevec_release(&pvec);
+	KSTAT_PERF_LEAVE(refill_inact)}
 }
 
 /*
@@ -1447,6 +1464,7 @@ shrink_caches(struct zone **zones, struc
 	int all_unreclaimable = 1;
 	int i;
 
+	ub_oom_start();
 	for (i = 0; zones[i] != NULL; i++) {
 		struct zone *zone = zones[i];
 
@@ -1495,6 +1513,7 @@ int try_to_free_pages(struct zone **zone
 	unsigned long lru_pages = 0;
 	int i;
 
+	KSTAT_PERF_ENTER(ttfp);
 	sc.gfp_mask = gfp_mask;
 	sc.may_writepage = !laptop_mode;
 	sc.may_swap = 1;
@@ -1564,6 +1583,7 @@ out:
 
 		zone->prev_priority = zone->temp_priority;
 	}
+	KSTAT_PERF_LEAVE(ttfp);
 	return ret;
 }
 
@@ -1897,7 +1917,8 @@ static int __init kswapd_init(void)
 	swap_setup();
 	for_each_pgdat(pgdat)
 		pgdat->kswapd
-		= find_task_by_pid(kernel_thread(kswapd, pgdat, CLONE_KERNEL));
+		= find_task_by_pid_all(kernel_thread(kswapd,
+					pgdat, CLONE_KERNEL));
 	total_memory = nr_free_pagecache_pages();
 	hotcpu_notifier(cpu_callback, 0);
 	return 0;
diff -upr linux-2.6.16.46-0.12.orig/net/8021q/vlan.c linux-2.6.16.46-0.12-027test011/net/8021q/vlan.c
--- linux-2.6.16.46-0.12.orig/net/8021q/vlan.c	2007-08-24 19:28:21.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/8021q/vlan.c	2007-08-28 17:35:32.000000000 +0400
@@ -31,6 +31,8 @@
 #include <net/arp.h>
 #include <linux/rtnetlink.h>
 #include <linux/notifier.h>
+#include <linux/ve_proto.h>
+#include <linux/ve.h>
 
 #include <linux/if_vlan.h>
 #include "vlan.h"
@@ -71,6 +73,44 @@ static struct packet_type vlan_packet_ty
 #define VLAN_LINK_STATE_MASK \
 	((1<<__LINK_STATE_PRESENT)|(1<<__LINK_STATE_NOCARRIER))
 
+#ifdef CONFIG_VE
+static int vlan_start(void *data)
+{
+	int err;
+
+	err = vlan_proc_init();
+	if (err < 0)
+		goto out_proc;
+
+	__module_get(THIS_MODULE);
+	return 0;
+
+out_proc:
+	return err;
+}
+
+static void vlan_stop(void *data)
+{
+	struct ve_struct *ve;
+
+	ve = (struct ve_struct *)data;
+	if (ve->_proc_vlan_dir == NULL)
+		return;
+
+	vlan_proc_cleanup();
+	ve->_proc_vlan_conf = NULL;
+	ve->_proc_vlan_dir = NULL;
+	module_put(THIS_MODULE);
+}
+
+static struct ve_hook vlan_ve_hook = {
+	.init		= vlan_start,
+	.fini		= vlan_stop,
+	.owner		= THIS_MODULE,
+	.priority	= HOOK_PRIO_NET_POST,
+};
+#endif
+
 /* End of global variables definitions. */
 
 /*
@@ -108,6 +148,7 @@ static int __init vlan_proto_init(void)
 	}
 
 	vlan_ioctl_set(vlan_ioctl_handler);
+	ve_hook_register(VE_SS_CHAIN, &vlan_ve_hook);
 
 	return 0;
 }
@@ -120,6 +161,8 @@ static void __exit vlan_cleanup_devices(
 {
 	struct net_device *dev, *nxt;
 
+	ve_hook_unregister(&vlan_ve_hook);
+
 	rtnl_lock();
 	for (dev = dev_base; dev; dev = nxt) {
 		nxt = dev->next;
@@ -164,14 +207,16 @@ module_init(vlan_proto_init);
 module_exit(vlan_cleanup_module);
 
 /* Must be invoked with RCU read lock (no preempt) */
-static struct vlan_group *__vlan_find_group(int real_dev_ifindex)
+static struct vlan_group *__vlan_find_group(int real_dev_ifindex,
+		struct ve_struct *ve)
 {
 	struct vlan_group *grp;
 	struct hlist_node *n;
 	int hash = vlan_grp_hashfn(real_dev_ifindex);
 
 	hlist_for_each_entry_rcu(grp, n, &vlan_group_hash[hash], hlist) {
-		if (grp->real_dev_ifindex == real_dev_ifindex)
+		if (grp->real_dev_ifindex == real_dev_ifindex &&
+				ve_accessible_strict(ve, grp->owner))
 			return grp;
 	}
 
@@ -185,7 +230,8 @@ static struct vlan_group *__vlan_find_gr
 struct net_device *__find_vlan_dev(struct net_device *real_dev,
 				   unsigned short VID)
 {
-	struct vlan_group *grp = __vlan_find_group(real_dev->ifindex);
+	struct vlan_group *grp = __vlan_find_group(real_dev->ifindex,
+			real_dev->owner_env);
 
 	if (grp)
                 return grp->vlan_devices[VID];
@@ -222,7 +268,7 @@ static int unregister_vlan_dev(struct ne
 		return -EINVAL;
 
 	ASSERT_RTNL();
-	grp = __vlan_find_group(real_dev_ifindex);
+	grp = __vlan_find_group(real_dev_ifindex, real_dev->owner_env);
 
 	ret = 0;
 
@@ -264,6 +310,9 @@ static int unregister_vlan_dev(struct ne
 
 				hlist_del_rcu(&grp->hlist);
 
+				put_ve(grp->owner);
+				grp->owner = NULL;
+
 				/* Free the group, after all cpu's are done. */
 				call_rcu(&grp->rcu, vlan_rcu_free);
 
@@ -342,6 +391,8 @@ static void vlan_setup(struct net_device
 	new_dev->set_multicast_list = vlan_dev_set_multicast_list;
 	new_dev->destructor = free_netdev;
 	new_dev->do_ioctl = vlan_dev_ioctl;
+	if (!ve_is_super(get_exec_env()))
+		new_dev->features |= NETIF_F_VIRTUAL;
 }
 
 /*  Attach a VLAN device to a mac address (ie Ethernet Card).
@@ -504,19 +555,20 @@ static struct net_device *register_vlan_
 	/* So, got the sucker initialized, now lets place
 	 * it into our local structure.
 	 */
-	grp = __vlan_find_group(real_dev->ifindex);
+	grp = __vlan_find_group(real_dev->ifindex, real_dev->owner_env);
 
 	/* Note, we are running under the RTNL semaphore
 	 * so it cannot "appear" on us.
 	 */
 	if (!grp) { /* need to add a new group */
-		grp = kmalloc(sizeof(struct vlan_group), GFP_KERNEL);
+		grp = kmalloc(sizeof(struct vlan_group), GFP_KERNEL_UBC);
 		if (!grp)
 			goto out_free_unregister;
 					
 		/* printk(KERN_ALERT "VLAN REGISTER:  Allocated new group.\n"); */
 		memset(grp, 0, sizeof(struct vlan_group));
 		grp->real_dev_ifindex = real_dev->ifindex;
+		grp->owner = get_ve(real_dev->owner_env);
 
 		hlist_add_head_rcu(&grp->hlist, 
 				   &vlan_group_hash[vlan_grp_hashfn(real_dev->ifindex)]);
@@ -562,10 +614,12 @@ out_ret_null:
 static int vlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
 {
 	struct net_device *dev = ptr;
-	struct vlan_group *grp = __vlan_find_group(dev->ifindex);
+	struct vlan_group *grp;
 	int i, flgs;
 	struct net_device *vlandev;
+	struct ve_struct *env;
 
+	grp = __vlan_find_group(dev->ifindex, dev->owner_env);
 	if (!grp)
 		goto out;
 
@@ -640,7 +694,9 @@ static int vlan_device_event(struct noti
 			ret = unregister_vlan_dev(dev,
 						  VLAN_DEV_INFO(vlandev)->vlan_id);
 
+			env = set_exec_env(vlandev->owner_env);
 			unregister_netdevice(vlandev);
+			set_exec_env(env);
 
 			/* Group was destroyed? */
 			if (ret == 1)
@@ -653,6 +709,15 @@ out:
 	return NOTIFY_DONE;
 }
 
+static inline int vlan_check_caps(void)
+{
+	return capable(CAP_NET_ADMIN)
+#ifdef CONFIG_VE
+		|| capable(CAP_VE_NET_ADMIN)
+#endif
+		;
+}
+
 /*
  *	VLAN IOCTL handler.
  *	o execute requested action or pass command to the device driver
@@ -677,7 +742,7 @@ static int vlan_ioctl_handler(void __use
 
 	switch (args.cmd) {
 	case SET_VLAN_INGRESS_PRIORITY_CMD:
-		if (!capable(CAP_NET_ADMIN))
+		if (!vlan_check_caps())
 			return -EPERM;
 		err = vlan_dev_set_ingress_priority(args.device1,
 						    args.u.skb_priority,
@@ -685,7 +750,7 @@ static int vlan_ioctl_handler(void __use
 		break;
 
 	case SET_VLAN_EGRESS_PRIORITY_CMD:
-		if (!capable(CAP_NET_ADMIN))
+		if (!vlan_check_caps())
 			return -EPERM;
 		err = vlan_dev_set_egress_priority(args.device1,
 						   args.u.skb_priority,
@@ -693,7 +758,7 @@ static int vlan_ioctl_handler(void __use
 		break;
 
 	case SET_VLAN_FLAG_CMD:
-		if (!capable(CAP_NET_ADMIN))
+		if (!vlan_check_caps())
 			return -EPERM;
 		err = vlan_dev_set_vlan_flag(args.device1,
 					     args.u.flag,
@@ -701,7 +766,7 @@ static int vlan_ioctl_handler(void __use
 		break;
 
 	case SET_VLAN_NAME_TYPE_CMD:
-		if (!capable(CAP_NET_ADMIN))
+		if (!vlan_check_caps())
 			return -EPERM;
 		if ((args.u.name_type >= 0) &&
 		    (args.u.name_type < VLAN_NAME_TYPE_HIGHEST)) {
@@ -713,7 +778,7 @@ static int vlan_ioctl_handler(void __use
 		break;
 
 	case ADD_VLAN_CMD:
-		if (!capable(CAP_NET_ADMIN))
+		if (!vlan_check_caps())
 			return -EPERM;
 		/* we have been given the name of the Ethernet Device we want to
 		 * talk to:  args.dev1	 We also have the
@@ -727,7 +792,7 @@ static int vlan_ioctl_handler(void __use
 		break;
 
 	case DEL_VLAN_CMD:
-		if (!capable(CAP_NET_ADMIN))
+		if (!vlan_check_caps())
 			return -EPERM;
 		/* Here, the args.dev1 is the actual VLAN we want
 		 * to get rid of.
diff -upr linux-2.6.16.46-0.12.orig/net/8021q/vlan_dev.c linux-2.6.16.46-0.12-027test011/net/8021q/vlan_dev.c
--- linux-2.6.16.46-0.12.orig/net/8021q/vlan_dev.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/8021q/vlan_dev.c	2007-08-28 17:35:32.000000000 +0400
@@ -438,6 +438,7 @@ int vlan_dev_hard_header(struct sk_buff 
 
 int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
+	struct ve_struct *env;
 	struct net_device_stats *stats = vlan_dev_get_stats(dev);
 	struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
 
@@ -491,13 +492,17 @@ int vlan_dev_hard_start_xmit(struct sk_b
 	stats->tx_bytes += skb->len;
 
 	skb->dev = VLAN_DEV_INFO(dev)->real_dev;
+	skb->owner_env = skb->dev->owner_env;
+	env = set_exec_env(skb->owner_env);
 	dev_queue_xmit(skb);
+	set_exec_env(env);
 
 	return 0;
 }
 
 int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
+	struct ve_struct *env;
 	struct net_device_stats *stats = vlan_dev_get_stats(dev);
 	unsigned short veth_TCI;
 
@@ -515,7 +520,10 @@ int vlan_dev_hwaccel_hard_start_xmit(str
 	stats->tx_bytes += skb->len;
 
 	skb->dev = VLAN_DEV_INFO(dev)->real_dev;
+	skb->owner_env = skb->dev->owner_env;
+	env = set_exec_env(skb->owner_env);
 	dev_queue_xmit(skb);
+	set_exec_env(env);
 
 	return 0;
 }
diff -upr linux-2.6.16.46-0.12.orig/net/8021q/vlanproc.c linux-2.6.16.46-0.12-027test011/net/8021q/vlanproc.c
--- linux-2.6.16.46-0.12.orig/net/8021q/vlanproc.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/8021q/vlanproc.c	2007-08-28 17:35:32.000000000 +0400
@@ -115,13 +115,21 @@ static struct file_operations vlandev_fo
  *	/proc/net/vlan 
  */
 
+#ifdef CONFIG_VE
+#define proc_vlan_dir	(get_exec_env()->_proc_vlan_dir)
+#else
 static struct proc_dir_entry *proc_vlan_dir;
+#endif
 
 /*
  *	/proc/net/vlan/config 
  */
 
+#ifdef CONFIG_VE
+#define proc_vlan_conf	(get_exec_env()->_proc_vlan_conf)
+#else
 static struct proc_dir_entry *proc_vlan_conf;
+#endif
 
 /* Strings */
 static const char *vlan_name_type_str[VLAN_NAME_TYPE_HIGHEST] = {
@@ -155,7 +163,7 @@ void vlan_proc_cleanup(void)
  *	Create /proc/net/vlan entries
  */
 
-int __init vlan_proc_init(void)
+int vlan_proc_init(void)
 {
 	proc_vlan_dir = proc_mkdir(name_root, proc_net);
 	if (proc_vlan_dir) {
diff -upr linux-2.6.16.46-0.12.orig/net/bridge/br.c linux-2.6.16.46-0.12-027test011/net/bridge/br.c
--- linux-2.6.16.46-0.12.orig/net/bridge/br.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/bridge/br.c	2007-08-28 17:35:30.000000000 +0400
@@ -34,6 +34,7 @@ static int __init br_init(void)
 #endif
 	brioctl_set(br_ioctl_deviceless_stub);
 	br_handle_frame_hook = br_handle_frame;
+	br_hard_xmit_hook = br_xmit;
 
 	br_fdb_get_hook = br_fdb_get;
 	br_fdb_put_hook = br_fdb_put;
@@ -59,6 +60,7 @@ static void __exit br_deinit(void)
 	br_fdb_put_hook = NULL;
 
 	br_handle_frame_hook = NULL;
+	br_hard_xmit_hook = NULL;
 	br_fdb_fini();
 }
 
diff -upr linux-2.6.16.46-0.12.orig/net/bridge/br_device.c linux-2.6.16.46-0.12-027test011/net/bridge/br_device.c
--- linux-2.6.16.46-0.12.orig/net/bridge/br_device.c	2007-08-24 19:28:35.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/bridge/br_device.c	2007-08-28 17:35:32.000000000 +0400
@@ -39,11 +39,13 @@ int br_dev_xmit(struct sk_buff *skb, str
 	skb->mac.raw = skb->data;
 	skb_pull(skb, ETH_HLEN);
 
+	skb->brmark = BR_ALREADY_SEEN;
+
 	rcu_read_lock();
 	if (dest[0] & 1) 
 		br_flood_deliver(br, skb, 0);
 	else if ((dst = __br_fdb_get(br, dest)) != NULL)
-		br_deliver(dst->dst, skb);
+		br_deliver(dst->dst, skb, 1);
 	else
 		br_flood_deliver(br, skb, 0);
 
@@ -51,6 +53,35 @@ int br_dev_xmit(struct sk_buff *skb, str
 	return 0;
 }
 
+int br_xmit(struct sk_buff *skb, struct net_bridge_port *port)
+{
+	struct net_bridge *br = port->br;
+	const unsigned char *dest = skb->data;
+	struct net_bridge_fdb_entry *dst;
+
+	if (!br->via_phys_dev)
+		return 0;
+
+	br->statistics.tx_packets++;
+	br->statistics.tx_bytes += skb->len;
+
+	skb->mac.raw = skb->data;
+	skb_pull(skb, ETH_HLEN);
+
+	skb->brmark = BR_ALREADY_SEEN;
+
+	if (dest[0] & 1)
+		br_xmit_deliver(br, port, skb);
+	else if ((dst = __br_fdb_get(br, dest)) != NULL)
+		br_deliver(dst->dst, skb, 0);
+	else
+		br_xmit_deliver(br, port, skb);
+
+	skb_push(skb, ETH_HLEN);
+
+	return 0;
+}
+
 static int br_dev_open(struct net_device *dev)
 {
 	struct net_bridge *br = netdev_priv(dev);
diff -upr linux-2.6.16.46-0.12.orig/net/bridge/br_forward.c linux-2.6.16.46-0.12-027test011/net/bridge/br_forward.c
--- linux-2.6.16.46-0.12.orig/net/bridge/br_forward.c	2007-08-24 19:28:35.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/bridge/br_forward.c	2007-08-28 17:35:32.000000000 +0400
@@ -79,14 +79,24 @@ static void __br_forward(const struct ne
 }
 
 /* called with rcu_read_lock */
-void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
+void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb, int free)
 {
 	if (should_deliver(to, skb)) {
+		if (!free) {
+			struct sk_buff *skb2;
+
+			if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) {
+				to->br->statistics.tx_dropped++;
+				return;
+			}
+			skb = skb2;
+		}
 		__br_deliver(to, skb);
 		return;
 	}
 
-	kfree_skb(skb);
+	if (free)
+		kfree_skb(skb);
 }
 
 /* called with rcu_read_lock */
@@ -102,7 +112,7 @@ void br_forward(const struct net_bridge_
 
 /* called under bridge lock */
 static void br_flood(struct net_bridge *br, struct sk_buff *skb, int clone,
-	void (*__packet_hook)(const struct net_bridge_port *p, 
+	int free, void (*__packet_hook)(const struct net_bridge_port *p,
 			      struct sk_buff *skb))
 {
 	struct net_bridge_port *p;
@@ -144,18 +154,41 @@ static void br_flood(struct net_bridge *
 		return;
 	}
 
-	kfree_skb(skb);
+	if (free)
+		kfree_skb(skb);
 }
 
 
 /* called with rcu_read_lock */
 void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, int clone)
 {
-	br_flood(br, skb, clone, __br_deliver);
+	br_flood(br, skb, clone, 1, __br_deliver);
+}
+
+/* called with rcu_read_lock */
+void br_xmit_deliver(struct net_bridge *br, struct net_bridge_port *port,
+						struct sk_buff *skb)
+{
+	struct net_bridge_port *p;
+
+	list_for_each_entry_rcu(p, &br->port_list, list) {
+		if (p == port)
+			continue;
+		if (should_deliver(p, skb)) {
+			struct sk_buff *skb2;
+
+			if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) {
+				br->statistics.tx_dropped++;
+				return;
+			}
+			__br_deliver(p, skb2);
+		}
+	}
 }
 
 /* called under bridge lock */
 void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, int clone)
 {
-	br_flood(br, skb, clone, __br_forward);
+	skb->brmark = BR_ALREADY_SEEN;
+	br_flood(br, skb, clone, 1, __br_forward);
 }
diff -upr linux-2.6.16.46-0.12.orig/net/bridge/br_if.c linux-2.6.16.46-0.12-027test011/net/bridge/br_if.c
--- linux-2.6.16.46-0.12.orig/net/bridge/br_if.c	2007-08-24 19:28:35.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/bridge/br_if.c	2007-08-28 17:35:36.000000000 +0400
@@ -77,22 +77,15 @@ static int port_cost(struct net_device *
  * Called from work queue to allow for calling functions that
  * might sleep (such as speed check), and to debounce.
  */
-static void port_carrier_check(void *arg)
+void br_port_carrier_check(struct net_bridge_port *p)
 {
-	struct net_device *dev = arg;
-	struct net_bridge_port *p;
-	struct net_bridge *br;
-
-	rtnl_lock();
-	p = dev->br_port;
-	if (!p)
-		goto done;
-	br = p->br;
+	struct net_device *dev = p->dev;
+	struct net_bridge *br = p->br;
 
 	if (netif_carrier_ok(dev))
 		p->path_cost = port_cost(dev);
 
-	if (br->dev->flags & IFF_UP) {
+	if (netif_running(br->dev)) {
 		spin_lock_bh(&br->lock);
 		if (netif_carrier_ok(dev)) {
 			if (p->state == BR_STATE_DISABLED)
@@ -103,8 +96,6 @@ static void port_carrier_check(void *arg
 		}
 		spin_unlock_bh(&br->lock);
 	}
-done:
-	rtnl_unlock();
 }
 
 static void release_nbp(struct kobject *kobj)
@@ -157,8 +148,6 @@ static void del_nbp(struct net_bridge_po
 
 	dev_set_promiscuity(dev, -1);
 
-	cancel_delayed_work(&p->carrier_check);
-
 	spin_lock_bh(&br->lock);
 	br_stp_disable_port(p);
 	spin_unlock_bh(&br->lock);
@@ -180,6 +169,11 @@ static void del_br(struct net_bridge *br
 {
 	struct net_bridge_port *p, *n;
 
+	if (br->master_dev) {
+		dev_put(br->master_dev);
+		br->master_dev = NULL;
+	}
+
 	list_for_each_entry_safe(p, n, &br->port_list, list) {
 		del_nbp(p);
 	}
@@ -277,7 +271,6 @@ static struct net_bridge_port *new_nbp(s
 	p->port_no = index;
 	br_init_port(p);
 	p->state = BR_STATE_DISABLED;
-	INIT_WORK(&p->carrier_check, port_carrier_check, dev);
 	br_stp_port_timer_init(p);
 
 	kobject_init(&p->kobj);
@@ -448,10 +441,18 @@ int br_add_if(struct net_bridge *br, str
 	spin_lock_bh(&br->lock);
 	br_stp_recalculate_bridge_id(br);
 	br_features_recompute(br);
-	schedule_delayed_work(&p->carrier_check, BR_PORT_DEBOUNCE);
+	if (!(dev->features & NETIF_F_VIRTUAL)) {
+		dev_hold(dev);
+		br->master_dev = dev;
+	}
+
+	if ((dev->flags & IFF_UP) && netif_carrier_ok(dev) &&
+	    (br->dev->flags & IFF_UP))
+		br_stp_enable_port(p);
 	spin_unlock_bh(&br->lock);
 
 	dev_set_mtu(br->dev, br_min_mtu(br));
+
 	kobject_uevent(&p->kobj, KOBJ_ADD);
 
 	return 0;
@@ -477,6 +478,16 @@ int br_del_if(struct net_bridge *br, str
 	spin_lock_bh(&br->lock);
 	br_stp_recalculate_bridge_id(br);
 	br_features_recompute(br);
+	if (br->master_dev == dev) {
+		br->master_dev = NULL;
+		dev_put(dev);
+		list_for_each_entry(p, &br->port_list, list)
+			if (!(p->dev->features & NETIF_F_VIRTUAL)) {
+				dev_hold(p->dev);
+				br->master_dev = p->dev;
+				break;
+			}
+	}
 	spin_unlock_bh(&br->lock);
 
 	return 0;
diff -upr linux-2.6.16.46-0.12.orig/net/bridge/br_input.c linux-2.6.16.46-0.12-027test011/net/bridge/br_input.c
--- linux-2.6.16.46-0.12.orig/net/bridge/br_input.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/bridge/br_input.c	2007-08-28 17:35:32.000000000 +0400
@@ -29,13 +29,20 @@ static int br_pass_frame_up_finish(struc
 
 static void br_pass_frame_up(struct net_bridge *br, struct sk_buff *skb)
 {
-	struct net_device *indev;
+	struct net_device *indev, *outdev;
 
 	br->statistics.rx_packets++;
 	br->statistics.rx_bytes += skb->len;
 
 	indev = skb->dev;
-	skb->dev = br->dev;
+	if (!br->via_phys_dev)
+		skb->dev = br->dev;
+	else {
+		skb->brmark = BR_ALREADY_SEEN;
+		outdev = br->master_dev;
+		if (outdev)
+			skb->dev = outdev;
+	}
 
 	NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL,
 			br_pass_frame_up_finish);
@@ -60,7 +67,7 @@ int br_handle_frame_finish(struct sk_buf
 	if (p->state == BR_STATE_LEARNING)
 		goto drop;
 
-	if (br->dev->flags & IFF_PROMISC) {
+	if ((br->dev->flags & IFF_PROMISC) && !br->via_phys_dev) {
 		struct sk_buff *skb2;
 
 		skb2 = skb_clone(skb, GFP_ATOMIC);
@@ -135,6 +142,8 @@ int br_handle_frame(struct net_bridge_po
 			skb = *pskb;
 			dest = eth_hdr(skb)->h_dest;
 		}
+		if ((*pskb)->brmark == BR_ALREADY_SEEN)
+			return 0;
 
 		if (!compare_ether_addr(p->br->dev->dev_addr, dest))
 			skb->pkt_type = PACKET_HOST;
diff -upr linux-2.6.16.46-0.12.orig/net/bridge/br_ioctl.c linux-2.6.16.46-0.12-027test011/net/bridge/br_ioctl.c
--- linux-2.6.16.46-0.12.orig/net/bridge/br_ioctl.c	2007-08-24 19:28:09.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/bridge/br_ioctl.c	2007-08-28 17:35:32.000000000 +0400
@@ -138,6 +138,7 @@ static int old_dev_ioctl(struct net_devi
 		b.topology_change_detected = br->topology_change_detected;
 		b.root_port = br->root_port;
 		b.stp_enabled = br->stp_enabled;
+		b.via_phys_dev = br->via_phys_dev;
 		b.ageing_time = jiffies_to_clock_t(br->ageing_time);
 		b.hello_timer_value = br_timer_value(&br->hello_timer);
 		b.tcn_timer_value = br_timer_value(&br->tcn_timer);
@@ -256,6 +257,13 @@ static int old_dev_ioctl(struct net_devi
 		br->stp_enabled = args[1]?1:0;
 		return 0;
 
+	case BRCTL_SET_VIA_ORIG_DEV:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+
+		br->via_phys_dev = args[1] ? 1 : 0;
+		return 0;
+
 	case BRCTL_SET_BRIDGE_PRIORITY:
 		if (!capable(CAP_NET_ADMIN))
 			return -EPERM;
diff -upr linux-2.6.16.46-0.12.orig/net/bridge/br_notify.c linux-2.6.16.46-0.12-027test011/net/bridge/br_notify.c
--- linux-2.6.16.46-0.12.orig/net/bridge/br_notify.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/bridge/br_notify.c	2007-08-28 17:35:36.000000000 +0400
@@ -41,48 +41,47 @@ static int br_device_event(struct notifi
 
 	br = p->br;
 
-	spin_lock_bh(&br->lock);
 	switch (event) {
 	case NETDEV_CHANGEMTU:
 		dev_set_mtu(br->dev, br_min_mtu(br));
 		break;
 
 	case NETDEV_CHANGEADDR:
+		spin_lock_bh(&br->lock);
 		br_fdb_changeaddr(p, dev->dev_addr);
 		br_stp_recalculate_bridge_id(br);
+		spin_unlock_bh(&br->lock);
 		break;
 
 	case NETDEV_CHANGE:
-		if (br->dev->flags & IFF_UP)
-			schedule_delayed_work(&p->carrier_check, BR_PORT_DEBOUNCE);
+		br_port_carrier_check(p);
 		break;
 
 	case NETDEV_FEAT_CHANGE:
-		if (br->dev->flags & IFF_UP) 
+		spin_lock_bh(&br->lock);
+		if (netif_running(br->dev))
 			br_features_recompute(br);
-
-		/* could do recursive feature change notification
-		 * but who would care?? 
-		 */
+		spin_unlock_bh(&br->lock);
 		break;
 
 	case NETDEV_DOWN:
+		spin_lock_bh(&br->lock);
 		if (br->dev->flags & IFF_UP)
 			br_stp_disable_port(p);
+		spin_unlock_bh(&br->lock);
 		break;
 
 	case NETDEV_UP:
+		spin_lock_bh(&br->lock);
 		if (netif_carrier_ok(dev) && (br->dev->flags & IFF_UP)) 
 			br_stp_enable_port(p);
+		spin_unlock_bh(&br->lock);
 		break;
 
 	case NETDEV_UNREGISTER:
-		spin_unlock_bh(&br->lock);
 		br_del_if(br, dev);
-		goto done;
+		break;
 	} 
-	spin_unlock_bh(&br->lock);
 
- done:
 	return NOTIFY_DONE;
 }
diff -upr linux-2.6.16.46-0.12.orig/net/bridge/br_private.h linux-2.6.16.46-0.12-027test011/net/bridge/br_private.h
--- linux-2.6.16.46-0.12.orig/net/bridge/br_private.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/bridge/br_private.h	2007-08-28 17:35:36.000000000 +0400
@@ -27,8 +27,6 @@
 #define BR_PORT_BITS	10
 #define BR_MAX_PORTS	(1<<BR_PORT_BITS)
 
-#define BR_PORT_DEBOUNCE (HZ/10)
-
 #define BR_VERSION	"2.1"
 
 typedef struct bridge_id bridge_id;
@@ -82,7 +80,6 @@ struct net_bridge_port
 	struct timer_list		hold_timer;
 	struct timer_list		message_age_timer;
 	struct kobject			kobj;
-	struct work_struct		carrier_check;
 	struct rcu_head			rcu;
 };
 
@@ -91,6 +88,8 @@ struct net_bridge
 	spinlock_t			lock;
 	struct list_head		port_list;
 	struct net_device		*dev;
+	struct net_device		*master_dev;
+	unsigned char			via_phys_dev;
 	struct net_device_stats		statistics;
 	spinlock_t			hash_lock;
 	struct hlist_head		hash[BR_HASH_SIZE];
@@ -134,6 +133,7 @@ static inline int br_is_root_bridge(cons
 /* br_device.c */
 extern void br_dev_setup(struct net_device *dev);
 extern int br_dev_xmit(struct sk_buff *skb, struct net_device *dev);
+extern int br_xmit(struct sk_buff *skb, struct net_bridge_port *port);
 
 /* br_fdb.c */
 extern void br_fdb_init(void);
@@ -159,7 +159,7 @@ extern void br_fdb_update(struct net_bri
 
 /* br_forward.c */
 extern void br_deliver(const struct net_bridge_port *to,
-		struct sk_buff *skb);
+		struct sk_buff *skb, int free);
 extern int br_dev_queue_push_xmit(struct sk_buff *skb);
 extern void br_forward(const struct net_bridge_port *to,
 		struct sk_buff *skb);
@@ -167,11 +167,15 @@ extern int br_forward_finish(struct sk_b
 extern void br_flood_deliver(struct net_bridge *br,
 		      struct sk_buff *skb,
 		      int clone);
+extern void br_xmit_deliver(struct net_bridge *br,
+		      struct net_bridge_port *port,
+		      struct sk_buff *skb);
 extern void br_flood_forward(struct net_bridge *br,
 		      struct sk_buff *skb,
 		      int clone);
 
 /* br_if.c */
+extern void br_port_carrier_check(struct net_bridge_port *p);
 extern int br_add_bridge(const char *name);
 extern int br_del_bridge(const char *name);
 extern void br_cleanup_bridges(void);
diff -upr linux-2.6.16.46-0.12.orig/net/bridge/br_stp_if.c linux-2.6.16.46-0.12-027test011/net/bridge/br_stp_if.c
--- linux-2.6.16.46-0.12.orig/net/bridge/br_stp_if.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/bridge/br_stp_if.c	2007-08-28 17:35:30.000000000 +0400
@@ -120,7 +120,9 @@ void br_stp_disable_port(struct net_brid
 /* called under bridge lock */
 void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr)
 {
-	unsigned char oldaddr[6];
+	/* should be aligned on 2 bytes for compare_ether_addr() */
+	unsigned short oldaddr_aligned[ETH_ALEN >> 1];
+	unsigned char *oldaddr = (unsigned char *)oldaddr_aligned;
 	struct net_bridge_port *p;
 	int wasroot;
 
@@ -145,11 +147,14 @@ void br_stp_change_bridge_id(struct net_
 		br_become_root_bridge(br);
 }
 
-static const unsigned char br_mac_zero[6];
+/* should be aligned on 2 bytes for compare_ether_addr() */
+static const unsigned short br_mac_zero_aligned[ETH_ALEN >> 1];
 
 /* called under bridge lock */
 void br_stp_recalculate_bridge_id(struct net_bridge *br)
 {
+	const unsigned char *br_mac_zero =
+			(const unsigned char *)br_mac_zero_aligned;
 	const unsigned char *addr = br_mac_zero;
 	struct net_bridge_port *p;
 
diff -upr linux-2.6.16.46-0.12.orig/net/bridge/br_sysfs_br.c linux-2.6.16.46-0.12-027test011/net/bridge/br_sysfs_br.c
--- linux-2.6.16.46-0.12.orig/net/bridge/br_sysfs_br.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/bridge/br_sysfs_br.c	2007-08-28 17:35:32.000000000 +0400
@@ -157,6 +157,26 @@ static ssize_t store_stp_state(struct cl
 static CLASS_DEVICE_ATTR(stp_state, S_IRUGO | S_IWUSR, show_stp_state,
 			 store_stp_state);
 
+static ssize_t show_via_phys_dev_state(struct class_device *cd, char *buf)
+{
+	struct net_bridge *br = to_bridge(cd);
+	return sprintf(buf, "%d\n", br->via_phys_dev);
+}
+
+static void set_via_phys_dev_state(struct net_bridge *br, unsigned long val)
+{
+	br->via_phys_dev = val;
+}
+
+static ssize_t store_via_phys_dev_state(struct class_device *cd,
+			       const char *buf, size_t len)
+{
+	return store_bridge_parm(cd, buf, len, set_via_phys_dev_state);
+}
+
+static CLASS_DEVICE_ATTR(via_phys_dev, S_IRUGO | S_IWUSR, show_via_phys_dev_state,
+			 store_via_phys_dev_state);
+
 static ssize_t show_priority(struct class_device *cd, char *buf)
 {
 	struct net_bridge *br = to_bridge(cd);
@@ -248,6 +268,7 @@ static struct attribute *bridge_attrs[] 
 	&class_device_attr_max_age.attr,
 	&class_device_attr_ageing_time.attr,
 	&class_device_attr_stp_state.attr,
+	&class_device_attr_via_phys_dev.attr,
 	&class_device_attr_priority.attr,
 	&class_device_attr_bridge_id.attr,
 	&class_device_attr_root_id.attr,
diff -upr linux-2.6.16.46-0.12.orig/net/bridge/netfilter/ebt_among.c linux-2.6.16.46-0.12-027test011/net/bridge/netfilter/ebt_among.c
--- linux-2.6.16.46-0.12.orig/net/bridge/netfilter/ebt_among.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/bridge/netfilter/ebt_among.c	2007-08-28 17:35:30.000000000 +0400
@@ -176,7 +176,7 @@ static int ebt_among_check(const char *t
 			   unsigned int datalen)
 {
 	struct ebt_among_info *info = (struct ebt_among_info *) data;
-	int expected_length = sizeof(struct ebt_among_info);
+	int expected_length = EBT_ALIGN(sizeof(struct ebt_among_info));
 	const struct ebt_mac_wormhash *wh_dst, *wh_src;
 	int err;
 
@@ -185,7 +185,7 @@ static int ebt_among_check(const char *t
 	expected_length += ebt_mac_wormhash_size(wh_dst);
 	expected_length += ebt_mac_wormhash_size(wh_src);
 
-	if (datalen != EBT_ALIGN(expected_length)) {
+	if (datalen != expected_length) {
 		printk(KERN_WARNING
 		       "ebtables: among: wrong size: %d"
 		       "against expected %d, rounded to %Zd\n",
diff -upr linux-2.6.16.46-0.12.orig/net/compat.c linux-2.6.16.46-0.12-027test011/net/compat.c
--- linux-2.6.16.46-0.12.orig/net/compat.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/compat.c	2007-08-28 17:35:29.000000000 +0400
@@ -308,107 +308,6 @@ void scm_detach_fds_compat(struct msghdr
 }
 
 /*
- * For now, we assume that the compatibility and native version
- * of struct ipt_entry are the same - sfr.  FIXME
- */
-struct compat_ipt_replace {
-	char			name[IPT_TABLE_MAXNAMELEN];
-	u32			valid_hooks;
-	u32			num_entries;
-	u32			size;
-	u32			hook_entry[NF_IP_NUMHOOKS];
-	u32			underflow[NF_IP_NUMHOOKS];
-	u32			num_counters;
-	compat_uptr_t		counters;	/* struct ipt_counters * */
-	struct ipt_entry	entries[0];
-};
-
-static int do_netfilter_replace(int fd, int level, int optname,
-				char __user *optval, int optlen)
-{
-	struct compat_ipt_replace __user *urepl;
-	struct ipt_replace __user *repl_nat;
-	char name[IPT_TABLE_MAXNAMELEN];
-	u32 origsize, tmp32, num_counters;
-	unsigned int repl_nat_size;
-	int ret;
-	int i;
-	compat_uptr_t ucntrs;
-
-	urepl = (struct compat_ipt_replace __user *)optval;
-	if (get_user(origsize, &urepl->size))
-		return -EFAULT;
-
-	/* Hack: Causes ipchains to give correct error msg --RR */
-	if (optlen != sizeof(*urepl) + origsize)
-		return -ENOPROTOOPT;
-
-	/* XXX Assumes that size of ipt_entry is the same both in
-	 *     native and compat environments.
-	 */
-	repl_nat_size = sizeof(*repl_nat) + origsize;
-	repl_nat = compat_alloc_user_space(repl_nat_size);
-
-	ret = -EFAULT;
-	if (put_user(origsize, &repl_nat->size))
-		goto out;
-
-	if (!access_ok(VERIFY_READ, urepl, optlen) ||
-	    !access_ok(VERIFY_WRITE, repl_nat, optlen))
-		goto out;
-
-	if (__copy_from_user(name, urepl->name, sizeof(urepl->name)) ||
-	    __copy_to_user(repl_nat->name, name, sizeof(repl_nat->name)))
-		goto out;
-
-	if (__get_user(tmp32, &urepl->valid_hooks) ||
-	    __put_user(tmp32, &repl_nat->valid_hooks))
-		goto out;
-
-	if (__get_user(tmp32, &urepl->num_entries) ||
-	    __put_user(tmp32, &repl_nat->num_entries))
-		goto out;
-
-	if (__get_user(num_counters, &urepl->num_counters) ||
-	    __put_user(num_counters, &repl_nat->num_counters))
-		goto out;
-
-	if (__get_user(ucntrs, &urepl->counters) ||
-	    __put_user(compat_ptr(ucntrs), &repl_nat->counters))
-		goto out;
-
-	if (__copy_in_user(&repl_nat->entries[0],
-			   &urepl->entries[0],
-			   origsize))
-		goto out;
-
-	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
-		if (__get_user(tmp32, &urepl->hook_entry[i]) ||
-		    __put_user(tmp32, &repl_nat->hook_entry[i]) ||
-		    __get_user(tmp32, &urepl->underflow[i]) ||
-		    __put_user(tmp32, &repl_nat->underflow[i]))
-			goto out;
-	}
-
-	/*
-	 * Since struct ipt_counters just contains two u_int64_t members
-	 * we can just do the access_ok check here and pass the (converted)
-	 * pointer into the standard syscall.  We hope that the pointer is
-	 * not misaligned ...
-	 */
-	if (!access_ok(VERIFY_WRITE, compat_ptr(ucntrs),
-		       num_counters * sizeof(struct ipt_counters)))
-		goto out;
-
-
-	ret = sys_setsockopt(fd, level, optname,
-			     (char __user *)repl_nat, repl_nat_size);
-
-out:
-	return ret;
-}
-
-/*
  * A struct sock_filter is architecture independent.
  */
 struct compat_sock_fprog {
@@ -460,10 +359,6 @@ static int do_set_sock_timeout(int fd, i
 asmlinkage long compat_sys_setsockopt(int fd, int level, int optname,
 				char __user *optval, int optlen)
 {
-	/* SO_SET_REPLACE seems to be the same in all levels */
-	if (optname == IPT_SO_SET_REPLACE)
-		return do_netfilter_replace(fd, level, optname,
-					    optval, optlen);
 	if (level == SOL_SOCKET && optname == SO_ATTACH_FILTER)
 		return do_set_attach_filter(fd, level, optname,
 					    optval, optlen);
diff -upr linux-2.6.16.46-0.12.orig/net/core/datagram.c linux-2.6.16.46-0.12-027test011/net/core/datagram.c
--- linux-2.6.16.46-0.12.orig/net/core/datagram.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/core/datagram.c	2007-08-28 17:35:30.000000000 +0400
@@ -56,6 +56,8 @@
 #include <net/sock.h>
 #include <net/tcp_states.h>
 
+#include <ub/ub_net.h>
+
 /*
  *	Is a socket 'connection oriented' ?
  */
@@ -493,6 +495,7 @@ unsigned int datagram_poll(struct file *
 {
 	struct sock *sk = sock->sk;
 	unsigned int mask;
+	int no_ubc_space;
 
 	poll_wait(file, sk->sk_sleep, wait);
 	mask = 0;
@@ -500,8 +503,14 @@ unsigned int datagram_poll(struct file *
 	/* exceptional events? */
 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
 		mask |= POLLERR;
-	if (sk->sk_shutdown == SHUTDOWN_MASK)
+	if (sk->sk_shutdown == SHUTDOWN_MASK) {
+		no_ubc_space = 0;
 		mask |= POLLHUP;
+	} else {
+		no_ubc_space = ub_sock_makewres_other(sk, SOCK_MIN_UBCSPACE_CH);
+		if (no_ubc_space)
+			ub_sock_sndqueueadd_other(sk, SOCK_MIN_UBCSPACE_CH);
+	}
 
 	/* readable? */
 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
@@ -518,7 +527,7 @@ unsigned int datagram_poll(struct file *
 	}
 
 	/* writable? */
-	if (sock_writeable(sk))
+	if (!no_ubc_space && sock_writeable(sk))
 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
 	else
 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
diff -upr linux-2.6.16.46-0.12.orig/net/core/dev.c linux-2.6.16.46-0.12-027test011/net/core/dev.c
--- linux-2.6.16.46-0.12.orig/net/core/dev.c	2007-08-24 19:28:37.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/core/dev.c	2007-08-28 17:35:32.000000000 +0400
@@ -118,6 +118,8 @@
 #include <linux/err.h>
 #include <linux/dmaengine.h>
 #include <linux/audit.h>
+#include <ub/beancounter.h>
+#include <ub/ub_mem.h>
 
 #ifdef CONFIG_XEN
 #include <net/ip.h>
@@ -182,25 +184,40 @@ static spinlock_t net_dma_event_lock;
  * unregister_netdevice(), which must be called with the rtnl
  * semaphore held.
  */
+#ifdef CONFIG_VE
+#define dev_tail	(get_exec_env()->_net_dev_tail)
+#else
 struct net_device *dev_base;
 static struct net_device **dev_tail = &dev_base;
+EXPORT_SYMBOL(dev_base);
+#endif
 DEFINE_RWLOCK(dev_base_lock);
 
-EXPORT_SYMBOL(dev_base);
 EXPORT_SYMBOL(dev_base_lock);
 
+#ifdef CONFIG_VE
+#define MAX_UNMOVABLE_NETDEVICES (8*4096)
+static uint8_t unmovable_ifindex_list[MAX_UNMOVABLE_NETDEVICES/8];
+static LIST_HEAD(dev_global_list);
+#endif
+
 #define NETDEV_HASHBITS	8
 static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
 static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
 
-static inline struct hlist_head *dev_name_hash(const char *name)
+struct hlist_head *dev_name_hash(const char *name, struct ve_struct *env)
 {
-	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
+	unsigned hash;
+	if (!ve_is_super(env))
+		return visible_dev_head(env);
+	hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
 	return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
 }
 
-static inline struct hlist_head *dev_index_hash(int ifindex)
+struct hlist_head *dev_index_hash(int ifindex, struct ve_struct *env)
 {
+	if (!ve_is_super(env))
+		return visible_dev_index_head(env);
 	return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
 }
 
@@ -484,7 +501,7 @@ struct net_device *__dev_get_by_name(con
 {
 	struct hlist_node *p;
 
-	hlist_for_each(p, dev_name_hash(name)) {
+	hlist_for_each(p, dev_name_hash(name, get_exec_env())) {
 		struct net_device *dev
 			= hlist_entry(p, struct net_device, name_hlist);
 		if (!strncmp(dev->name, name, IFNAMSIZ))
@@ -517,6 +534,32 @@ struct net_device *dev_get_by_name(const
 }
 
 /**
+ *	__dev_global_get_by_name - find a device by its name in dev_global_list
+ *	@name: name to find
+ *
+ *	Find an interface by name. Must be called under RTNL semaphore
+ *	If the name is found a pointer to the device
+ *	is returned. If the name is not found then %NULL is returned. The
+ *	reference counters are not incremented so the caller must be
+ *	careful with locks.
+ */
+
+#ifdef CONFIG_VE
+struct net_device *__dev_global_get_by_name(const char *name)
+{
+	struct net_device *dev;
+	/* It's called relatively rarely */
+	list_for_each_entry(dev, &dev_global_list, dev_global_list_entry) {
+		if (strncmp(dev->name, name, IFNAMSIZ) == 0)
+			return dev;
+	}
+	return NULL;
+}
+#else	/* CONFIG_VE */
+#define __dev_global_get_by_name(name)		__dev_get_by_name(name)
+#endif	/* CONFIG_VE */
+
+/**
  *	__dev_get_by_index - find a device by its ifindex
  *	@ifindex: index of device
  *
@@ -531,7 +574,7 @@ struct net_device *__dev_get_by_index(in
 {
 	struct hlist_node *p;
 
-	hlist_for_each(p, dev_index_hash(ifindex)) {
+	hlist_for_each(p, dev_index_hash(ifindex, get_exec_env())) {
 		struct net_device *dev
 			= hlist_entry(p, struct net_device, index_hlist);
 		if (dev->ifindex == ifindex)
@@ -650,6 +693,23 @@ int dev_valid_name(const char *name)
 		 || strchr(name, '/'));
 }
 
+static inline void __dev_check_name(const char *dev_name, const char *name,
+		long *inuse, const int max_netdevices)
+{
+	int i = 0;
+	char buf[IFNAMSIZ];
+
+	if (!sscanf(dev_name, name, &i))
+		return;
+	if (i < 0 || i >= max_netdevices)
+		return;
+
+	/* avoid cases where sscanf is not exact inverse of printf */
+	snprintf(buf, sizeof(buf), name, i);
+	if (!strncmp(buf, dev_name, IFNAMSIZ))
+		set_bit(i, inuse);
+}
+
 /**
  *	dev_alloc_name - allocate a name for a device
  *	@dev: device
@@ -686,16 +746,20 @@ int dev_alloc_name(struct net_device *de
 		if (!inuse)
 			return -ENOMEM;
 
-		for (d = dev_base; d; d = d->next) {
-			if (!sscanf(d->name, name, &i))
-				continue;
-			if (i < 0 || i >= max_netdevices)
-				continue;
-
-			/*  avoid cases where sscanf is not exact inverse of printf */
-			snprintf(buf, sizeof(buf), name, i);
-			if (!strncmp(buf, d->name, IFNAMSIZ))
-				set_bit(i, inuse);
+#ifdef CONFIG_VE
+		if (ve_is_super(get_exec_env())) {
+			list_for_each_entry(d, &dev_global_list,
+					dev_global_list_entry) {
+				__dev_check_name(d->name, name, inuse,
+						max_netdevices);
+			}
+		} else
+#endif
+		{
+			for (d = dev_base; d; d = d->next) {
+				__dev_check_name(d->name, name, inuse,
+						max_netdevices);
+			}
 		}
 
 		i = find_first_zero_bit(inuse, max_netdevices);
@@ -703,7 +767,11 @@ int dev_alloc_name(struct net_device *de
 	}
 
 	snprintf(buf, sizeof(buf), name, i);
-	if (!__dev_get_by_name(buf)) {
+	if (ve_is_super(get_exec_env()))
+		d = __dev_global_get_by_name(buf);
+	else
+		d = __dev_get_by_name(buf);
+	if (d == NULL) {
 		strlcpy(dev->name, buf, IFNAMSIZ);
 		return i;
 	}
@@ -736,13 +804,14 @@ int dev_change_name(struct net_device *d
 	if (!dev_valid_name(newname))
 		return -EINVAL;
 
+	/* Rename of devices in VE is prohibited by CAP_NET_ADMIN */
 	if (strchr(newname, '%')) {
 		err = dev_alloc_name(dev, newname);
 		if (err < 0)
 			return err;
 		strcpy(newname, dev->name);
 	}
-	else if (__dev_get_by_name(newname))
+	else if (__dev_global_get_by_name(newname))
 		return -EEXIST;
 	else
 	{
@@ -754,7 +823,8 @@ int dev_change_name(struct net_device *d
 	err = class_device_rename(&dev->class_dev, dev->name);
 	if (!err) {
 		hlist_del(&dev->name_hlist);
-		hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
+		hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name,
+					get_exec_env()));
 		notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
 	}
 
@@ -1014,6 +1084,8 @@ int call_netdevice_notifiers(unsigned lo
 	return notifier_call_chain(&netdev_chain, val, v);
 }
 
+EXPORT_SYMBOL(call_netdevice_notifiers);
+
 /* When > 0 there are consumers of rx skb time stamps */
 static atomic_t netstamp_needed = ATOMIC_INIT(0);
 
@@ -1270,6 +1342,23 @@ static int dev_gso_segment(struct sk_buf
 	return 0;
 }
 
+#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
+int (*br_hard_xmit_hook)(struct sk_buff *skb, struct net_bridge_port *port);
+static __inline__ int bridge_hard_start_xmit(struct sk_buff *skb,
+						struct net_device *dev)
+{
+	struct net_bridge_port *port;
+
+	if (((port = rcu_dereference(dev->br_port)) == NULL) ||
+		(skb->brmark == BR_ALREADY_SEEN))
+		return 0;
+
+	return br_hard_xmit_hook(skb, port);
+}
+#else
+#define bridge_hard_start_xmit(skb, dev)	(0)
+#endif
+
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	if (likely(!skb->next)) {
@@ -1283,6 +1372,8 @@ int dev_hard_start_xmit(struct sk_buff *
 				goto gso;
 		}
 
+		bridge_hard_start_xmit(skb, dev);
+
 		return dev->hard_start_xmit(skb, dev);
 	}
 
@@ -1293,6 +1384,9 @@ gso:
 
 		skb->next = nskb->next;
 		nskb->next = NULL;
+
+		bridge_hard_start_xmit(skb, dev);
+
 		rc = dev->hard_start_xmit(nskb, dev);
 		if (unlikely(rc)) {
 			nskb->next = skb->next;
@@ -1450,6 +1544,34 @@ gso:
 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
 #endif
 	if (q->enqueue) {
+		/*
+		 * XXX this code is broken:
+		 *  1) it is activated for normal devices in VE0,
+		 *  2) it doesn't use API functions like ub_skb_set_charge,
+		 *  3) it isn't allowed to charge skb as UB_OTHERSOCKBUF
+		 *     if its socket is TCP.
+		 */
+#if 0
+		struct user_beancounter *ub;
+
+		ub = netdev_bc(dev)->exec_ub;
+		/* the skb CAN be already charged if it transmitted via
+		 * something like bonding device */
+		if (ub && (skb_bc(skb)->resource == 0)) {
+			unsigned long chargesize;
+			chargesize = skb_charge_fullsize(skb);
+			if (charge_beancounter(ub, UB_OTHERSOCKBUF,
+						chargesize, UB_SOFT)) {
+				rcu_read_unlock();
+				rc = -ENOMEM;
+				goto out_kfree_skb;
+			}
+			skb_bc(skb)->ub = ub;
+			skb_bc(skb)->charged = chargesize;
+			skb_bc(skb)->resource = UB_OTHERSOCKBUF;
+		}
+#endif
+
 		/* Grab device queue */
 		spin_lock(&dev->queue_lock);
 		q = dev->qdisc;
@@ -1735,6 +1857,7 @@ int netif_receive_skb(struct sk_buff *sk
 	struct net_device *orig_dev;
 	int ret = NET_RX_DROP;
 	unsigned short type;
+	struct ve_struct *old_env;
 
 	/* if we've gotten here through NAPI, check netpoll */
 	if (skb->dev->poll && netpoll_rx(skb))
@@ -1753,6 +1876,17 @@ int netif_receive_skb(struct sk_buff *sk
 	skb->h.raw = skb->nh.raw = skb->data;
 	skb->mac_len = skb->nh.raw - skb->mac.raw;
 
+#ifdef CONFIG_VE
+	/*
+	 * Skb might be alloced in another VE context, than its device works.
+	 * So, set the correct owner_env.
+	 */
+	skb->owner_env = skb->dev->owner_env;
+	BUG_ON(skb->owner_env == NULL);
+#endif
+
+	old_env = set_exec_env(skb->owner_env);
+
 	pt_prev = NULL;
 
 	rcu_read_lock();
@@ -1831,6 +1965,7 @@ ncls:
 
 out:
 	rcu_read_unlock();
+	(void)set_exec_env(old_env);
 	return ret;
 }
 
@@ -2219,7 +2354,7 @@ static int __init dev_proc_init(void)
 {
 	int rc = -ENOMEM;
 
-	if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
+	if (!proc_glob_fops_create("net/dev", S_IRUGO, &dev_seq_fops))
 		goto out;
 	if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
 		goto out_dev;
@@ -2231,7 +2366,7 @@ out:
 out_softnet:
 	proc_net_remove("softnet_stat");
 out_dev:
-	proc_net_remove("dev");
+	remove_proc_glob_entry("net/dev", NULL);
 	goto out;
 }
 #else
@@ -2296,9 +2431,12 @@ void dev_set_promiscuity(struct net_devi
 		dev->flags &= ~IFF_PROMISC;
 	else
 		dev->flags |= IFF_PROMISC;
+	/* Promiscous mode on these devices does not mean anything */
+	if (dev->flags & (IFF_LOOPBACK|IFF_POINTOPOINT))
+		return;
 	if (dev->flags != old_flags) {
 		dev_mc_upload(dev);
-		printk(KERN_INFO "device %s %s promiscuous mode\n",
+		ve_printk(VE_LOG, KERN_INFO "device %s %s promiscuous mode\n",
 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
 		       					       "left");
 		audit_log(current->audit_context, GFP_ATOMIC,
@@ -2623,9 +2761,9 @@ int dev_ioctl(unsigned int cmd, void __u
 	 */
 
 	if (cmd == SIOCGIFCONF) {
-		rtnl_shlock();
+		rtnl_lock();
 		ret = dev_ifconf((char __user *) arg);
-		rtnl_shunlock();
+		rtnl_unlock();
 		return ret;
 	}
 	if (cmd == SIOCGIFNAME)
@@ -2716,11 +2854,20 @@ int dev_ioctl(unsigned int cmd, void __u
 		 *	- require strict serialization.
 		 *	- do not return a value
 		 */
+		case SIOCSIFMTU:
+		case SIOCSIFHWADDR:
+			if (!capable(CAP_NET_ADMIN) &&
+			    !capable(CAP_VE_NET_ADMIN))
+				return -EPERM;
+			dev_load(ifr.ifr_name);
+			rtnl_lock();
+			ret = dev_ifsioc(&ifr, cmd);
+			rtnl_unlock();
+			return ret;
+
 		case SIOCSIFFLAGS:
 		case SIOCSIFMETRIC:
-		case SIOCSIFMTU:
 		case SIOCSIFMAP:
-		case SIOCSIFHWADDR:
 		case SIOCSIFSLAVE:
 		case SIOCADDMULTI:
 		case SIOCDELMULTI:
@@ -2800,20 +2947,73 @@ int dev_ioctl(unsigned int cmd, void __u
  *	dev_new_index	-	allocate an ifindex
  *
  *	Returns a suitable unique value for a new device interface
- *	number.  The caller must hold the rtnl semaphore or the
+ *	number. The caller must hold the rtnl semaphore or the
  *	dev_base_lock to be sure it remains unique.
+ *
+ *	Note: dev->name must be valid on entrance
  */
-static int dev_new_index(void)
+static int dev_ve_new_index(void)
 {
-	static int ifindex;
+#ifdef CONFIG_VE
+	int *ifindex = &get_exec_env()->ifindex;
+	int delta = 2;
+#else
+	static int s_ifindex;
+	int *ifindex = &s_ifindex;
+	int delta = 1;
+#endif
 	for (;;) {
-		if (++ifindex <= 0)
-			ifindex = 1;
-		if (!__dev_get_by_index(ifindex))
-			return ifindex;
+		*ifindex += delta;
+		if (*ifindex <= 0)
+			*ifindex = 1;
+		if (!__dev_get_by_index(*ifindex))
+			return *ifindex;
 	}
 }
 
+#ifdef CONFIG_VE
+static int dev_glb_new_index(void)
+{
+	int i;
+
+	i = find_first_zero_bit((long*)unmovable_ifindex_list,
+			MAX_UNMOVABLE_NETDEVICES);
+
+	if (i == MAX_UNMOVABLE_NETDEVICES)
+		return -EMFILE;
+
+	__set_bit(i, (long*)unmovable_ifindex_list);
+	return (i + 1) * 2;
+}
+#endif
+
+static void dev_glb_free_index(struct net_device *dev)
+{
+#ifdef CONFIG_VE
+	int bit;
+
+	bit = dev->ifindex / 2 - 1;
+	BUG_ON(bit >= MAX_UNMOVABLE_NETDEVICES);
+	__clear_bit(bit, (long*)unmovable_ifindex_list);
+#endif
+}
+
+static int dev_new_index(struct net_device *dev)
+{
+#ifdef CONFIG_VE
+	if (ve_is_super(get_exec_env()) && ve_is_dev_movable(dev))
+		return dev_glb_new_index();
+#endif
+
+	return dev_ve_new_index();
+}
+
+static void dev_free_index(struct net_device *dev)
+{
+	if ((dev->ifindex % 2) == 0)
+		dev_glb_free_index(dev);
+}
+
 static int dev_boot_phase = 1;
 
 /* Delayed registration/unregisteration */
@@ -2856,6 +3056,10 @@ int register_netdevice(struct net_device
 	/* When net_device's are persistent, this will be fatal. */
 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
 
+	ret = -EPERM;
+	if (!ve_is_super(get_exec_env()) && ve_is_dev_movable(dev))
+		goto out;
+
 	spin_lock_init(&dev->queue_lock);
 	spin_lock_init(&dev->_xmit_lock);
 	dev->xmit_lock_owner = -1;
@@ -2875,27 +3079,32 @@ int register_netdevice(struct net_device
 		if (ret) {
 			if (ret > 0)
 				ret = -EIO;
-			goto out_err;
+			goto out_free_div;
 		}
 	}
  
 	if (!dev_valid_name(dev->name)) {
 		ret = -EINVAL;
-		goto out_err;
+		goto out_free_div;
+	}
+
+	dev->ifindex = dev_new_index(dev);
+	if (dev->ifindex < 0) {
+		ret = dev->ifindex;
+		goto out_free_div;
 	}
 
-	dev->ifindex = dev_new_index();
 	if (dev->iflink == -1)
 		dev->iflink = dev->ifindex;
 
 	/* Check for existence of name */
-	head = dev_name_hash(dev->name);
+	head = dev_name_hash(dev->name, get_exec_env());
 	hlist_for_each(p, head) {
 		struct net_device *d
 			= hlist_entry(p, struct net_device, name_hlist);
 		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
 			ret = -EEXIST;
- 			goto out_err;
+ 			goto out_free_ind;
 		}
  	}
 
@@ -2945,12 +3154,20 @@ int register_netdevice(struct net_device
 	set_bit(__LINK_STATE_PRESENT, &dev->state);
 
 	dev->next = NULL;
+	dev->owner_env = get_exec_env();
+	netdev_bc(dev)->owner_ub = get_beancounter(get_exec_ub());
+	netdev_bc(dev)->exec_ub = get_beancounter(get_exec_ub());
 	dev_init_scheduler(dev);
+#ifdef CONFIG_VE
+	if (ve_is_super(get_exec_env()))
+		list_add_tail(&dev->dev_global_list_entry, &dev_global_list);
+#endif
 	write_lock_bh(&dev_base_lock);
 	*dev_tail = dev;
 	dev_tail = &dev->next;
 	hlist_add_head(&dev->name_hlist, head);
-	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
+	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex,
+						get_exec_env()));
 	dev_hold(dev);
 	dev->reg_state = NETREG_REGISTERING;
 	write_unlock_bh(&dev_base_lock);
@@ -2964,7 +3181,9 @@ int register_netdevice(struct net_device
 
 out:
 	return ret;
-out_err:
+out_free_ind:
+	dev_free_index(dev);
+out_free_div:
 	free_divert_blk(dev);
 	goto out;
 }
@@ -3010,6 +3229,10 @@ int register_netdev(struct net_device *d
 	err = register_netdevice(dev);
 out:
 	rtnl_unlock();
+	if (err == 0 && dev->reg_state != NETREG_REGISTERED) {
+		unregister_netdev(dev);
+		err = -ENOMEM;
+	}
 	return err;
 }
 EXPORT_SYMBOL(register_netdev);
@@ -3032,7 +3255,7 @@ static void netdev_wait_allrefs(struct n
 	rebroadcast_time = warning_time = jiffies;
 	while (atomic_read(&dev->refcnt) != 0) {
 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
-			rtnl_shlock();
+			rtnl_lock();
 
 			/* Rebroadcast unregister notification */
 			notifier_call_chain(&netdev_chain,
@@ -3049,7 +3272,7 @@ static void netdev_wait_allrefs(struct n
 				linkwatch_run_queue();
 			}
 
-			rtnl_shunlock();
+			__rtnl_unlock();
 
 			rebroadcast_time = jiffies;
 		}
@@ -3092,6 +3315,7 @@ void netdev_run_todo(void)
 {
 	struct list_head list = LIST_HEAD_INIT(list);
 	int err;
+	struct ve_struct *current_env;
 
 
 	/* Need to guard against multiple cpu's getting out of order. */
@@ -3110,22 +3334,30 @@ void netdev_run_todo(void)
 	list_splice_init(&net_todo_list, &list);
 	spin_unlock(&net_todo_list_lock);
 		
+	current_env = get_exec_env();
 	while (!list_empty(&list)) {
 		struct net_device *dev
 			= list_entry(list.next, struct net_device, todo_list);
 		list_del(&dev->todo_list);
 
+		(void)set_exec_env(dev->owner_env);
 		switch(dev->reg_state) {
 		case NETREG_REGISTERING:
 			dev->reg_state = NETREG_REGISTERED;
 			err = netdev_register_sysfs(dev);
-			if (err)
+			if (err) {
 				printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
 				       dev->name, err);
+				dev->reg_state = NETREG_REGISTER_ERR;
+				break;
+			}
 			break;
 
 		case NETREG_UNREGISTERING:
 			netdev_unregister_sysfs(dev);
+			/* fall through */
+
+		case NETREG_REGISTER_ERR:
 			dev->reg_state = NETREG_UNREGISTERED;
 
 			netdev_wait_allrefs(dev);
@@ -3136,6 +3368,10 @@ void netdev_run_todo(void)
 			BUG_TRAP(!dev->ip6_ptr);
 			BUG_TRAP(!dev->dn_ptr);
 
+			put_beancounter(netdev_bc(dev)->exec_ub);
+			put_beancounter(netdev_bc(dev)->owner_ub);
+			netdev_bc(dev)->exec_ub = NULL;
+			netdev_bc(dev)->owner_ub = NULL;
 
 			/* It must be the very last action, 
 			 * after this 'dev' may point to freed up memory.
@@ -3150,6 +3386,7 @@ void netdev_run_todo(void)
 			break;
 		}
 	}
+	(void)set_exec_env(current_env);
 
 out:
 	up(&net_todo_run_mutex);
@@ -3175,7 +3412,7 @@ struct net_device *alloc_netdev(int size
 	alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
 	alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
 
-	p = kmalloc(alloc_size, GFP_KERNEL);
+	p = ub_kmalloc(alloc_size, GFP_KERNEL);
 	if (!p) {
 		printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
 		return NULL;
@@ -3255,7 +3492,8 @@ int unregister_netdevice(struct net_devi
 		return -ENODEV;
 	}
 
-	BUG_ON(dev->reg_state != NETREG_REGISTERED);
+	BUG_ON(dev->reg_state != NETREG_REGISTERED &&
+	       dev->reg_state != NETREG_REGISTER_ERR);
 
 	/* If device is running, close it first. */
 	if (dev->flags & IFF_UP)
@@ -3271,6 +3509,10 @@ int unregister_netdevice(struct net_devi
 				dev_tail = dp;
 			*dp = d->next;
 			write_unlock_bh(&dev_base_lock);
+#ifdef CONFIG_VE
+			if (ve_is_super(get_exec_env()))
+				list_del(&dev->dev_global_list_entry);
+#endif
 			break;
 		}
 	}
@@ -3280,7 +3522,8 @@ int unregister_netdevice(struct net_devi
 		return -ENODEV;
 	}
 
-	dev->reg_state = NETREG_UNREGISTERING;
+	if (dev->reg_state != NETREG_REGISTER_ERR)
+		dev->reg_state = NETREG_UNREGISTERING;
 
 	synchronize_net();
 
@@ -3304,6 +3547,8 @@ int unregister_netdevice(struct net_devi
 	/* Notifier chain MUST detach us from master device. */
 	BUG_TRAP(!dev->master);
 
+	dev_free_index(dev);
+
 	free_divert_blk(dev);
 
 	/* Finish processing unregister after unlock */
@@ -3544,6 +3789,8 @@ EXPORT_SYMBOL(dev_close);
 EXPORT_SYMBOL(dev_get_by_flags);
 EXPORT_SYMBOL(dev_get_by_index);
 EXPORT_SYMBOL(dev_get_by_name);
+EXPORT_SYMBOL(dev_name_hash);
+EXPORT_SYMBOL(dev_index_hash);
 EXPORT_SYMBOL(dev_open);
 EXPORT_SYMBOL(dev_queue_xmit);
 EXPORT_SYMBOL(dev_remove_pack);
@@ -3572,6 +3819,7 @@ EXPORT_SYMBOL(skb_checksum_setup);
 
 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
 EXPORT_SYMBOL(br_handle_frame_hook);
+EXPORT_SYMBOL(br_hard_xmit_hook);
 EXPORT_SYMBOL(br_fdb_get_hook);
 EXPORT_SYMBOL(br_fdb_put_hook);
 #endif
diff -upr linux-2.6.16.46-0.12.orig/net/core/dev_mcast.c linux-2.6.16.46-0.12-027test011/net/core/dev_mcast.c
--- linux-2.6.16.46-0.12.orig/net/core/dev_mcast.c	2007-08-24 19:28:35.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/core/dev_mcast.c	2007-08-28 17:35:31.000000000 +0400
@@ -295,9 +295,10 @@ static struct file_operations dev_mc_seq
 
 void __init dev_mcast_init(void)
 {
-	proc_net_fops_create("dev_mcast", 0, &dev_mc_seq_fops);
+	proc_glob_fops_create("net/dev_mcast", 0, &dev_mc_seq_fops);
 }
 
 EXPORT_SYMBOL(dev_mc_add);
 EXPORT_SYMBOL(dev_mc_delete);
 EXPORT_SYMBOL(dev_mc_upload);
+EXPORT_SYMBOL(dev_mc_discard);
diff -upr linux-2.6.16.46-0.12.orig/net/core/dst.c linux-2.6.16.46-0.12-027test011/net/core/dst.c
--- linux-2.6.16.46-0.12.orig/net/core/dst.c	2007-08-24 19:28:08.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/core/dst.c	2007-08-28 17:35:32.000000000 +0400
@@ -259,11 +259,14 @@ static int dst_dev_event(struct notifier
 	switch (event) {
 	case NETDEV_UNREGISTER:
 	case NETDEV_DOWN:
-		spin_lock_bh(&dst_lock);
+		local_bh_disable();
+		dst_run_gc(0);
+		spin_lock(&dst_lock);
 		for (dst = dst_garbage_list; dst; dst = dst->next) {
 			dst_ifdown(dst, dev, event != NETDEV_DOWN);
 		}
-		spin_unlock_bh(&dst_lock);
+		spin_unlock(&dst_lock);
+		local_bh_enable();
 		break;
 	}
 	return NOTIFY_DONE;
diff -upr linux-2.6.16.46-0.12.orig/net/core/filter.c linux-2.6.16.46-0.12-027test011/net/core/filter.c
--- linux-2.6.16.46-0.12.orig/net/core/filter.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/core/filter.c	2007-08-28 17:35:30.000000000 +0400
@@ -34,6 +34,7 @@
 #include <linux/timer.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
+#include <asm/unaligned.h>
 #include <linux/filter.h>
 
 /* No hurry in this branch */
@@ -177,7 +178,7 @@ unsigned int sk_run_filter(struct sk_buf
 load_w:
 			ptr = load_pointer(skb, k, 4, &tmp);
 			if (ptr != NULL) {
-				A = ntohl(*(u32 *)ptr);
+				A = ntohl(get_unaligned((u32 *)ptr));
 				continue;
 			}
 			break;
@@ -186,7 +187,7 @@ load_w:
 load_h:
 			ptr = load_pointer(skb, k, 2, &tmp);
 			if (ptr != NULL) {
-				A = ntohs(*(u16 *)ptr);
+				A = ntohs(get_unaligned((u16 *)ptr));
 				continue;
 			}
 			break;
@@ -406,7 +407,7 @@ int sk_attach_filter(struct sock_fprog *
 	if (fprog->filter == NULL)
 		return -EINVAL;
 
-	fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
+	fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL_UBC);
 	if (!fp)
 		return -ENOMEM;
 	if (copy_from_user(fp->insns, fprog->filter, fsize)) {
diff -upr linux-2.6.16.46-0.12.orig/net/core/link_watch.c linux-2.6.16.46-0.12-027test011/net/core/link_watch.c
--- linux-2.6.16.46-0.12.orig/net/core/link_watch.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/core/link_watch.c	2007-08-28 17:35:30.000000000 +0400
@@ -99,9 +99,9 @@ static void linkwatch_event(void *dummy)
 	linkwatch_nextevent = jiffies + HZ;
 	clear_bit(LW_RUNNING, &linkwatch_flags);
 
-	rtnl_shlock();
+	rtnl_lock();
 	linkwatch_run_queue();
-	rtnl_shunlock();
+	rtnl_unlock();
 }
 
 
diff -upr linux-2.6.16.46-0.12.orig/net/core/neighbour.c linux-2.6.16.46-0.12-027test011/net/core/neighbour.c
--- linux-2.6.16.46-0.12.orig/net/core/neighbour.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/core/neighbour.c	2007-08-28 17:35:32.000000000 +0400
@@ -33,6 +33,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/random.h>
 #include <linux/string.h>
+#include <ub/beancounter.h>
 
 #define NEIGH_DEBUG 1
 
@@ -242,6 +243,7 @@ static struct neighbour *neigh_alloc(str
 	int entries;
 
 	entries = atomic_inc_return(&tbl->entries) - 1;
+	n = ERR_PTR(-ENOBUFS);
 	if (entries >= tbl->gc_thresh3 ||
 	    (entries >= tbl->gc_thresh2 &&
 	     time_after(now, tbl->last_flush + 5 * HZ))) {
@@ -252,7 +254,7 @@ static struct neighbour *neigh_alloc(str
 
 	n = kmem_cache_alloc(tbl->kmem_cachep, SLAB_ATOMIC);
 	if (!n)
-		goto out_entries;
+		goto out_nomem;
 
 	memset(n, 0, tbl->entry_size);
 
@@ -273,6 +275,8 @@ static struct neighbour *neigh_alloc(str
 out:
 	return n;
 
+out_nomem:
+	n = ERR_PTR(-ENOMEM);
 out_entries:
 	atomic_dec(&tbl->entries);
 	goto out;
@@ -388,12 +392,11 @@ struct neighbour *neigh_create(struct ne
 	u32 hash_val;
 	int key_len = tbl->key_len;
 	int error;
-	struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
+	struct neighbour *n1, *rc, *n;
 
-	if (!n) {
-		rc = ERR_PTR(-ENOBUFS);
+	rc = n = neigh_alloc(tbl);
+	if (IS_ERR(n))
 		goto out;
-	}
 
 	memcpy(n->primary_key, pkey, key_len);
 	n->dev = dev;
@@ -639,6 +642,8 @@ static void neigh_periodic_timer(unsigne
 	struct neigh_table *tbl = (struct neigh_table *)arg;
 	struct neighbour *n, **np;
 	unsigned long expire, now = jiffies;
+	struct ve_struct *env = set_exec_env(tbl->owner_env);
+	struct user_beancounter *ub = set_exec_ub(tbl->owner_ub);
 
 	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
 
@@ -700,6 +705,8 @@ next_elt:
  	mod_timer(&tbl->gc_timer, now + expire);
 
 	write_unlock(&tbl->lock);
+	set_exec_ub(ub);
+	set_exec_env(env);
 }
 
 static __inline__ int neigh_max_probes(struct neighbour *n)
@@ -727,6 +734,11 @@ static void neigh_timer_handler(unsigned
 	struct neighbour *neigh = (struct neighbour *)arg;
 	unsigned state;
 	int notify = 0;
+	struct ve_struct *env;
+	struct user_beancounter *ub;
+
+	env = set_exec_env(neigh->dev->owner_env);
+	ub = set_exec_ub(netdev_bc(neigh->dev)->exec_ub);
 
 	write_lock(&neigh->lock);
 
@@ -824,6 +836,8 @@ out:
 		neigh_app_notify(neigh);
 #endif
 	neigh_release(neigh);
+	(void)set_exec_ub(ub);
+	(void)set_exec_env(env);
 }
 
 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
@@ -1202,6 +1216,9 @@ static void neigh_proxy_process(unsigned
 	unsigned long now = jiffies;
 	struct sk_buff *skb;
 
+	struct ve_struct *env = set_exec_env(tbl->owner_env);
+	struct user_beancounter *ub = set_exec_ub(tbl->owner_ub);
+
 	spin_lock(&tbl->proxy_queue.lock);
 
 	skb = tbl->proxy_queue.next;
@@ -1213,6 +1230,7 @@ static void neigh_proxy_process(unsigned
 		skb = skb->next;
 		if (tdif <= 0) {
 			struct net_device *dev = back->dev;
+
 			__skb_unlink(back, &tbl->proxy_queue);
 			if (tbl->proxy_redo && netif_running(dev))
 				tbl->proxy_redo(back);
@@ -1220,6 +1238,7 @@ static void neigh_proxy_process(unsigned
 				kfree_skb(back);
 
 			dev_put(dev);
+
 		} else if (!sched_next || tdif < sched_next)
 			sched_next = tdif;
 	}
@@ -1227,6 +1246,8 @@ static void neigh_proxy_process(unsigned
 	if (sched_next)
 		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
 	spin_unlock(&tbl->proxy_queue.lock);
+	(void)set_exec_ub(ub);
+	(void)set_exec_env(env);
 }
 
 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
@@ -1323,12 +1344,15 @@ void neigh_parms_destroy(struct neigh_pa
 }
 
 
-void neigh_table_init(struct neigh_table *tbl)
+int neigh_table_init(struct neigh_table *tbl)
 {
 	unsigned long now = jiffies;
 	unsigned long phsize;
 
 	atomic_set(&tbl->parms.refcnt, 1);
+	atomic_set(&tbl->entries, 0);
+	tbl->hash_chain_gc = 0;
+	tbl->parms.next = NULL;
 	INIT_RCU_HEAD(&tbl->parms.rcu_head);
 	tbl->parms.reachable_time =
 			  neigh_rand_reach_time(tbl->parms.base_reachable_time);
@@ -1336,22 +1360,30 @@ void neigh_table_init(struct neigh_table
 	if (!tbl->kmem_cachep)
 		tbl->kmem_cachep = kmem_cache_create(tbl->id,
 						     tbl->entry_size,
-						     0, SLAB_HWCACHE_ALIGN,
+						     0, SLAB_HWCACHE_ALIGN | SLAB_UBC,
 						     NULL, NULL);
 
 	if (!tbl->kmem_cachep)
-		panic("cannot create neighbour cache");
+		return -ENOMEM;
+
+	tbl->owner_env = get_ve(get_exec_env());
+	tbl->owner_ub = get_beancounter(get_exec_ub());
 
 	tbl->stats = alloc_percpu(struct neigh_statistics);
 	if (!tbl->stats)
-		panic("cannot create neighbour cache statistics");
+		goto out;
 	
 #ifdef CONFIG_PROC_FS
-	tbl->pde = create_proc_entry(tbl->id, 0, proc_net_stat);
-	if (!tbl->pde) 
-		panic("cannot create neighbour proc dir entry");
-	tbl->pde->proc_fops = &neigh_stat_seq_fops;
-	tbl->pde->data = tbl;
+	if (ve_is_super(get_exec_env())) {
+		char name[strlen(tbl->id) + sizeof("net/stat/")];
+		strcpy(name, "net/stat/");
+		strcat(name, tbl->id);
+		tbl->pde = create_proc_glob_entry(name, S_IRUGO, NULL);
+		if (tbl->pde) {
+			tbl->pde->proc_fops = &neigh_stat_seq_fops;
+			tbl->pde->data = tbl;
+		}
+	}
 #endif
 
 	tbl->hash_mask = 1;
@@ -1361,7 +1393,7 @@ void neigh_table_init(struct neigh_table
 	tbl->phash_buckets = kmalloc(phsize, GFP_KERNEL);
 
 	if (!tbl->hash_buckets || !tbl->phash_buckets)
-		panic("cannot allocate neighbour cache hashes");
+		goto nomem;
 
 	memset(tbl->phash_buckets, 0, phsize);
 
@@ -1385,6 +1417,25 @@ void neigh_table_init(struct neigh_table
 	tbl->next	= neigh_tables;
 	neigh_tables	= tbl;
 	write_unlock(&neigh_tbl_lock);
+	return 0;
+
+nomem:
+	if (tbl->hash_buckets) {
+		neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1);
+		tbl->hash_buckets = NULL;
+	}
+	if (tbl->phash_buckets) {
+		kfree(tbl->phash_buckets);
+		tbl->phash_buckets = NULL;
+	}
+	if (tbl->stats) {
+		free_percpu(tbl->stats);
+		tbl->stats = NULL;
+	}
+out:
+	put_beancounter(tbl->owner_ub);
+	put_ve(tbl->owner_env);
+	return -ENOMEM;
 }
 
 int neigh_table_clear(struct neigh_table *tbl)
@@ -1398,6 +1449,15 @@ int neigh_table_clear(struct neigh_table
 	neigh_ifdown(tbl, NULL);
 	if (atomic_read(&tbl->entries))
 		printk(KERN_CRIT "neighbour leakage\n");
+#ifdef CONFIG_PROC_FS
+	if (ve_is_super(get_exec_env())) {
+		char name[strlen(tbl->id) + sizeof("net/stat/")];
+		strcpy(name, "net/stat/");
+		strcat(name, tbl->id);
+		remove_proc_glob_entry(name, NULL);
+	}
+#endif
+
 	write_lock(&neigh_tbl_lock);
 	for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
 		if (*tp == tbl) {
@@ -1413,6 +1473,9 @@ int neigh_table_clear(struct neigh_table
 	kfree(tbl->phash_buckets);
 	tbl->phash_buckets = NULL;
 
+	put_beancounter(tbl->owner_ub);
+	put_ve(tbl->owner_env);
+
 	return 0;
 }
 
@@ -1435,6 +1498,8 @@ int neigh_delete(struct sk_buff *skb, st
 
 		if (tbl->family != ndm->ndm_family)
 			continue;
+		if (!ve_accessible_strict(tbl->owner_env, get_exec_env()))
+			continue;
 		read_unlock(&neigh_tbl_lock);
 
 		err = -EINVAL;
@@ -1488,6 +1553,8 @@ int neigh_add(struct sk_buff *skb, struc
 
 		if (tbl->family != ndm->ndm_family)
 			continue;
+		if (!ve_accessible_strict(tbl->owner_env, get_exec_env()))
+			continue;
 		read_unlock(&neigh_tbl_lock);
 
 		err = -EINVAL;
@@ -1720,6 +1787,9 @@ int neightbl_set(struct sk_buff *skb, st
 		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
 			continue;
 
+		if (!ve_accessible_strict(tbl->owner_env, get_exec_env()))
+			continue;
+
 		if (!rtattr_strcmp(tb[NDTA_NAME - 1], tbl->id))
 			break;
 	}
@@ -1941,6 +2011,8 @@ int neigh_dump_info(struct sk_buff *skb,
 	s_t = cb->args[0];
 
 	for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
+		if (!ve_accessible_strict(tbl->owner_env, get_exec_env()))
+			continue;
 		if (t < s_t || (family && tbl->family != family))
 			continue;
 		if (t > s_t)
@@ -2530,11 +2602,12 @@ int neigh_sysctl_register(struct net_dev
 			  int p_id, int pdev_id, char *p_name, 
 			  proc_handler *handler, ctl_handler *strategy)
 {
-	struct neigh_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
+	struct neigh_sysctl_table *t;
 	const char *dev_name_source = NULL;
 	char *dev_name = NULL;
 	int err = 0;
 
+	t = kmalloc(sizeof(*t), GFP_KERNEL);
 	if (!t)
 		return -ENOBUFS;
 	memcpy(t, &neigh_sysctl_template, sizeof(*t));
diff -upr linux-2.6.16.46-0.12.orig/net/core/net-sysfs.c linux-2.6.16.46-0.12-027test011/net/core/net-sysfs.c
--- linux-2.6.16.46-0.12.orig/net/core/net-sysfs.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/core/net-sysfs.c	2007-08-28 17:35:31.000000000 +0400
@@ -388,12 +388,13 @@ static void netdev_release(struct class_
 	struct net_device *dev 
 		= container_of(cd, struct net_device, class_dev);
 
-	BUG_ON(dev->reg_state != NETREG_RELEASED);
+	BUG_ON(dev->reg_state != NETREG_RELEASED &&
+	       dev->reg_state != NETREG_REGISTERING);
 
 	kfree((char *)dev - dev->padded);
 }
 
-static struct class net_class = {
+struct class net_class = {
 	.name = "net",
 	.release = netdev_release,
 	.class_dev_attrs = net_class_attributes,
@@ -401,6 +402,13 @@ static struct class net_class = {
 	.uevent = netdev_uevent,
 #endif
 };
+EXPORT_SYMBOL(net_class);
+
+#ifndef CONFIG_VE
+#define visible_net_class net_class
+#else
+#define visible_net_class (*get_exec_env()->net_class)
+#endif
 
 void netdev_unregister_sysfs(struct net_device * net)
 {
@@ -424,7 +432,7 @@ int netdev_register_sysfs(struct net_dev
 	struct class_device *class_dev = &(net->class_dev);
 	int ret;
 
-	class_dev->class = &net_class;
+	class_dev->class = &visible_net_class;
 	class_dev->class_data = net;
 
 	strlcpy(class_dev->class_id, net->name, BUS_ID_SIZE);
@@ -453,12 +461,21 @@ out_cleanup:
 out_unreg:
 	printk(KERN_WARNING "%s: sysfs attribute registration failed %d\n",
 	       net->name, ret);
-	class_device_unregister(class_dev);
+	/* put is called in free_netdev() */
+	class_device_del(class_dev);
 out:
 	return ret;
 }
 
+void prepare_sysfs_netdev(void)
+{
+#ifdef CONFIG_VE
+	get_ve0()->net_class = &net_class;
+#endif
+}
+
 int netdev_sysfs_init(void)
 {
+	prepare_sysfs_netdev();
 	return class_register(&net_class);
 }
diff -upr linux-2.6.16.46-0.12.orig/net/core/netpoll.c linux-2.6.16.46-0.12-027test011/net/core/netpoll.c
--- linux-2.6.16.46-0.12.orig/net/core/netpoll.c	2007-08-24 19:28:35.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/core/netpoll.c	2007-08-28 17:35:30.000000000 +0400
@@ -666,14 +666,14 @@ int netpoll_setup(struct netpoll *np)
 		printk(KERN_INFO "%s: device %s not up yet, forcing it\n",
 		       np->name, np->dev_name);
 
-		rtnl_shlock();
+		rtnl_lock();
 		if (dev_change_flags(ndev, ndev->flags | IFF_UP) < 0) {
 			printk(KERN_ERR "%s: failed to open %s\n",
 			       np->name, np->dev_name);
-			rtnl_shunlock();
+			rtnl_unlock();
 			goto release;
 		}
-		rtnl_shunlock();
+		rtnl_unlock();
 
 		atleast = jiffies + HZ/10;
  		atmost = jiffies + 4*HZ;
diff -upr linux-2.6.16.46-0.12.orig/net/core/rtnetlink.c linux-2.6.16.46-0.12-027test011/net/core/rtnetlink.c
--- linux-2.6.16.46-0.12.orig/net/core/rtnetlink.c	2007-08-24 19:28:19.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/core/rtnetlink.c	2007-08-28 17:35:32.000000000 +0400
@@ -35,6 +35,7 @@
 #include <linux/skbuff.h>
 #include <linux/init.h>
 #include <linux/security.h>
+#include <linux/mutex.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -51,25 +52,31 @@
 #include <net/pkt_sched.h>
 #include <net/netlink.h>
 
-DECLARE_MUTEX(rtnl_sem);
+static DEFINE_MUTEX(rtnl_mutex);
 
 void rtnl_lock(void)
 {
-	rtnl_shlock();
+	mutex_lock(&rtnl_mutex);
 }
 
-int rtnl_lock_interruptible(void)
+void __rtnl_unlock(void)
 {
-	return down_interruptible(&rtnl_sem);
+	mutex_unlock(&rtnl_mutex);
 }
- 
+
 void rtnl_unlock(void)
 {
-	rtnl_shunlock();
-
+	mutex_unlock(&rtnl_mutex);
+	if (rtnl && rtnl->sk_receive_queue.qlen)
+		rtnl->sk_data_ready(rtnl, 0);
 	netdev_run_todo();
 }
 
+int rtnl_trylock(void)
+{
+	return mutex_trylock(&rtnl_mutex);
+}
+
 int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len)
 {
 	memset(tb, 0, sizeof(struct rtattr*)*maxattr);
@@ -447,6 +454,8 @@ static int rtnetlink_dump_all(struct sk_
 		if (rtnetlink_links[idx] == NULL ||
 		    rtnetlink_links[idx][type].dumpit == NULL)
 			continue;
+		if (vz_security_family_check(idx))
+			continue;
 		if (idx > s_idx)
 			memset(&cb->args[0], 0, sizeof(cb->args));
 		if (rtnetlink_links[idx][type].dumpit(skb, cb))
@@ -514,7 +523,7 @@ rtnetlink_rcv_msg(struct sk_buff *skb, s
 		return 0;
 
 	family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
-	if (family >= NPROTO) {
+	if (family >= NPROTO || vz_security_family_check(family)) {
 		*errp = -EAFNOSUPPORT;
 		return -1;
 	}
@@ -588,9 +597,9 @@ static void rtnetlink_rcv(struct sock *s
 	unsigned int qlen = 0;
 
 	do {
-		rtnl_lock();
+		mutex_lock(&rtnl_mutex);
 		netlink_run_queue(sk, &qlen, &rtnetlink_rcv_msg);
-		up(&rtnl_sem);
+		mutex_unlock(&rtnl_mutex);
 
 		netdev_run_todo();
 	} while (qlen);
@@ -667,6 +676,5 @@ EXPORT_SYMBOL(rtnetlink_links);
 EXPORT_SYMBOL(rtnetlink_put_metrics);
 EXPORT_SYMBOL(rtnl);
 EXPORT_SYMBOL(rtnl_lock);
-EXPORT_SYMBOL(rtnl_lock_interruptible);
-EXPORT_SYMBOL(rtnl_sem);
+EXPORT_SYMBOL(rtnl_trylock);
 EXPORT_SYMBOL(rtnl_unlock);
diff -upr linux-2.6.16.46-0.12.orig/net/core/scm.c linux-2.6.16.46-0.12-027test011/net/core/scm.c
--- linux-2.6.16.46-0.12.orig/net/core/scm.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/core/scm.c	2007-08-28 17:35:32.000000000 +0400
@@ -34,6 +34,7 @@
 #include <net/compat.h>
 #include <net/scm.h>
 
+#include <ub/ub_mem.h>
 
 /*
  *	Only allow a user to send credentials, that they could set with 
@@ -42,7 +43,9 @@
 
 static __inline__ int scm_check_creds(struct ucred *creds)
 {
-	if ((creds->pid == current->tgid || capable(CAP_SYS_ADMIN)) &&
+	if ((creds->pid == virt_tgid(current) ||
+	     creds->pid == current->tgid ||
+	     capable(CAP_VE_SYS_ADMIN)) &&
 	    ((creds->uid == current->uid || creds->uid == current->euid ||
 	      creds->uid == current->suid) || capable(CAP_SETUID)) &&
 	    ((creds->gid == current->gid || creds->gid == current->egid ||
@@ -69,7 +72,7 @@ static int scm_fp_copy(struct cmsghdr *c
 
 	if (!fpl)
 	{
-		fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
+		fpl = ub_kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
 		if (!fpl)
 			return -ENOMEM;
 		*fplp = fpl;
@@ -275,7 +278,7 @@ struct scm_fp_list *scm_fp_dup(struct sc
 	if (!fpl)
 		return NULL;
 
-	new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
+	new_fpl = ub_kmalloc(sizeof(*fpl), GFP_KERNEL);
 	if (new_fpl) {
 		for (i=fpl->count-1; i>=0; i--)
 			get_file(fpl->fp[i]);
diff -upr linux-2.6.16.46-0.12.orig/net/core/skbuff.c linux-2.6.16.46-0.12-027test011/net/core/skbuff.c
--- linux-2.6.16.46-0.12.orig/net/core/skbuff.c	2007-08-24 19:28:37.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/core/skbuff.c	2007-08-28 17:35:32.000000000 +0400
@@ -48,6 +48,7 @@
 #include <linux/in.h>
 #include <linux/inet.h>
 #include <linux/slab.h>
+#include <linux/kmem_cache.h>
 #include <linux/netdevice.h>
 #ifdef CONFIG_NET_CLS_ACT
 #include <net/pkt_sched.h>
@@ -68,6 +69,8 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
 
+#include <ub/ub_net.h>
+
 static kmem_cache_t *skbuff_head_cache __read_mostly;
 static kmem_cache_t *skbuff_fclone_cache __read_mostly;
 
@@ -148,6 +151,9 @@ struct sk_buff *__alloc_skb(unsigned int
 	if (!skb)
 		goto out;
 
+	if (ub_skb_alloc_bc(skb, gfp_mask & ~__GFP_DMA))
+		goto noubc;
+
 	/* Get the DATA. Size must match skb_add_mtu(). */
 	size = SKB_DATA_ALIGN(size);
 	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
@@ -161,6 +167,7 @@ struct sk_buff *__alloc_skb(unsigned int
 	skb->data = data;
 	skb->tail = data;
 	skb->end  = data + size;
+	skb->owner_env = get_exec_env();
 	/* make sure we initialize shinfo sequentially */
 	shinfo = skb_shinfo(skb);
 	atomic_set(&shinfo->dataref, 1);
@@ -183,6 +190,8 @@ struct sk_buff *__alloc_skb(unsigned int
 out:
 	return skb;
 nodata:
+	ub_skb_free_bc(skb);
+noubc:
 	kmem_cache_free(cache, skb);
 	skb = NULL;
 	goto out;
@@ -220,6 +229,9 @@ struct sk_buff *alloc_skb_from_cache(kme
 	if (!skb)
 		goto out;
 
+	if (ub_skb_alloc_bc(skb, gfp_mask & ~__GFP_DMA))
+		goto noubc;
+
 	/* Get the DATA. */
 	size = SKB_DATA_ALIGN(size);
 	data = kmem_cache_alloc(cp, gfp_mask);
@@ -233,6 +245,7 @@ struct sk_buff *alloc_skb_from_cache(kme
 	skb->data = data;
 	skb->tail = data;
 	skb->end  = data + size;
+	skb->owner_env = get_exec_env();
 	/* make sure we initialize shinfo sequentially */
 	shinfo = skb_shinfo(skb);
 	atomic_set(&shinfo->dataref, 1);
@@ -255,6 +268,8 @@ struct sk_buff *alloc_skb_from_cache(kme
 out:
 	return skb;
 nodata:
+	ub_skb_free_bc(skb);
+noubc:
 	kmem_cache_free(cache, skb);
 	skb = NULL;
 	goto out;
@@ -314,6 +329,7 @@ void kfree_skbmem(struct sk_buff *skb)
 	atomic_t *fclone_ref;
 
 	skb_release_data(skb);
+	ub_skb_free_bc(skb);
 	switch (skb->fclone) {
 	case SKB_FCLONE_UNAVAILABLE:
 		kmem_cache_free(skbuff_head_cache, skb);
@@ -355,6 +371,7 @@ void __kfree_skb(struct sk_buff *skb)
 #ifdef CONFIG_XFRM
 	secpath_put(skb->sp);
 #endif
+	ub_skb_uncharge(skb);
 	if (skb->destructor) {
 		WARN_ON(in_irq());
 		skb->destructor(skb);
@@ -410,6 +427,11 @@ struct sk_buff *skb_clone(struct sk_buff
 		n->fclone = SKB_FCLONE_UNAVAILABLE;
 	}
 
+	if (ub_skb_alloc_bc(n, gfp_mask)) {
+		kmem_cache_free(skbuff_head_cache, n);
+		return NULL;
+	}
+
 #define C(x) n->x = skb->x
 
 	n->next = n->prev = NULL;
@@ -444,6 +466,7 @@ struct sk_buff *skb_clone(struct sk_buff
 	C(ipvs_property);
 #endif
 	C(protocol);
+	C(owner_env);
 	n->destructor = NULL;
 #ifdef CONFIG_NETFILTER
 	C(nfmark);
@@ -454,6 +477,9 @@ struct sk_buff *skb_clone(struct sk_buff
 	C(nfct_reasm);
 	nf_conntrack_get_reasm(skb->nfct_reasm);
 #endif
+#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
+	C(brmark);
+#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	C(nf_bridge);
 	nf_bridge_get(skb->nf_bridge);
@@ -469,12 +495,17 @@ struct sk_buff *skb_clone(struct sk_buff
 #endif
 
 #endif
+	skb_copy_brmark(n, skb);
 	C(truesize);
 	atomic_set(&n->users, 1);
 	C(head);
 	C(data);
 	C(tail);
 	C(end);
+#ifndef CONFIG_XEN
+	C(accounted);
+	C(redirected);
+#endif
 
 	atomic_inc(&(skb_shinfo(skb)->dataref));
 	skb->cloned = 1;
@@ -529,6 +560,10 @@ static void copy_skb_header(struct sk_bu
 #endif
 	new->tc_index	= old->tc_index;
 #endif
+#ifndef CONFIG_XEN
+	new->accounted = old->accounted;
+	new->redirected = old->redirected;
+#endif
 	atomic_set(&new->users, 1);
 	skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
 	skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
diff -upr linux-2.6.16.46-0.12.orig/net/core/sock.c linux-2.6.16.46-0.12-027test011/net/core/sock.c
--- linux-2.6.16.46-0.12.orig/net/core/sock.c	2007-08-24 19:28:19.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/core/sock.c	2007-08-28 17:35:36.000000000 +0400
@@ -108,6 +108,7 @@
 #include <linux/net.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <linux/kmem_cache.h>
 #include <linux/interrupt.h>
 #include <linux/poll.h>
 #include <linux/tcp.h>
@@ -124,6 +125,9 @@
 #include <net/xfrm.h>
 #include <linux/ipsec.h>
 
+#include <ub/ub_net.h>
+#include <ub/beancounter.h>
+
 #include <linux/filter.h>
 
 #ifdef CONFIG_INET
@@ -157,7 +161,20 @@ static int sock_set_timeout(long *timeo_
 		return -EINVAL;
 	if (copy_from_user(&tv, optval, sizeof(tv)))
 		return -EFAULT;
+	if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
+		return -EDOM;
+
+	if (tv.tv_sec < 0) {
+		static int warned;
 
+		*timeo_p = 0;
+		if (warned < 10 && net_ratelimit())
+			warned++;
+			ve_printk(VE_LOG, KERN_INFO "sock_set_timeout: "
+				"`%s' (pid %d) tries to set negative timeout\n",
+				 current->comm, current->pid);
+		return 0;
+	}
 	*timeo_p = MAX_SCHEDULE_TIMEOUT;
 	if (tv.tv_sec == 0 && tv.tv_usec == 0)
 		return 0;
@@ -172,7 +189,7 @@ static void sock_warn_obsolete_bsdism(co
 	static char warncomm[TASK_COMM_LEN];
 	if (strcmp(warncomm, current->comm) && warned < 5) { 
 		strcpy(warncomm,  current->comm); 
-		printk(KERN_WARNING "process `%s' is using obsolete "
+		ve_printk(VE_LOG, KERN_WARNING "process `%s' is using obsolete "
 		       "%s SO_BSDCOMPAT\n", warncomm, name);
 		warned++;
 	}
@@ -660,6 +677,7 @@ struct sock *sk_alloc(int family, gfp_t 
 			 */
 			sk->sk_prot = sk->sk_prot_creator = prot;
 			sock_lock_init(sk);
+			sk->owner_env = get_exec_env();
 		}
 		
 		if (security_sk_alloc(sk, family, priority))
@@ -699,6 +717,7 @@ void sk_free(struct sock *sk)
 		       __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
 
 	security_sk_free(sk);
+	ub_sock_uncharge(sk);
 	if (sk->sk_prot_creator->slab != NULL)
 		kmem_cache_free(sk->sk_prot_creator->slab, sk);
 	else
@@ -746,14 +765,11 @@ struct sock *sk_clone(const struct sock 
 		if (filter != NULL)
 			sk_filter_charge(newsk, filter);
 
-		if (unlikely(xfrm_sk_clone_policy(newsk))) {
-			/* It is still raw copy of parent, so invalidate
-			 * destructor and make plain sk_free() */
-			newsk->sk_destruct = NULL;
-			sk_free(newsk);
-			newsk = NULL;
-			goto out;
-		}
+		if (ub_sock_charge(newsk, newsk->sk_family, newsk->sk_type) < 0)
+			goto out_err;
+
+		if (unlikely(xfrm_sk_clone_policy(newsk)))
+			 goto out_err;
 
 		newsk->sk_err	   = 0;
 		newsk->sk_priority = 0;
@@ -777,8 +793,15 @@ struct sock *sk_clone(const struct sock 
 		if (newsk->sk_prot->sockets_allocated)
 			atomic_inc(newsk->sk_prot->sockets_allocated);
 	}
-out:
 	return newsk;
+
+out_err:
+	/* It is still raw copy of parent, so invalidate
+	 * destructor and make plain sk_free() */
+	sock_reset_flag(newsk, SOCK_TIMESTAMP);
+	newsk->sk_destruct = NULL;
+	sk_free(newsk);
+	return NULL;
 }
 
 EXPORT_SYMBOL_GPL(sk_clone);
@@ -938,11 +961,9 @@ static long sock_wait_for_wmem(struct so
 /*
  *	Generic send/receive buffer handlers
  */
-
-static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
-					    unsigned long header_len,
-					    unsigned long data_len,
-					    int noblock, int *errcode)
+struct sk_buff *sock_alloc_send_skb2(struct sock *sk, unsigned long size,
+				     unsigned long size2, int noblock,
+				     int *errcode)
 {
 	struct sk_buff *skb;
 	gfp_t gfp_mask;
@@ -963,46 +984,35 @@ static struct sk_buff *sock_alloc_send_p
 		if (sk->sk_shutdown & SEND_SHUTDOWN)
 			goto failure;
 
-		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
-			skb = alloc_skb(header_len, gfp_mask);
-			if (skb) {
-				int npages;
-				int i;
-
-				/* No pages, we're done... */
-				if (!data_len)
-					break;
-
-				npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
-				skb->truesize += data_len;
-				skb_shinfo(skb)->nr_frags = npages;
-				for (i = 0; i < npages; i++) {
-					struct page *page;
-					skb_frag_t *frag;
-
-					page = alloc_pages(sk->sk_allocation, 0);
-					if (!page) {
-						err = -ENOBUFS;
-						skb_shinfo(skb)->nr_frags = i;
-						kfree_skb(skb);
-						goto failure;
-					}
-
-					frag = &skb_shinfo(skb)->frags[i];
-					frag->page = page;
-					frag->page_offset = 0;
-					frag->size = (data_len >= PAGE_SIZE ?
-						      PAGE_SIZE :
-						      data_len);
-					data_len -= PAGE_SIZE;
-				}
+		if (ub_sock_getwres_other(sk, skb_charge_size(size))) {
+			if (size2 < size) {
+				size = size2;
+				continue;
+			}
+			set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+			err = -EAGAIN;
+			if (!timeo)
+				goto failure;
+			if (signal_pending(current))
+				goto interrupted;
+			timeo = ub_sock_wait_for_space(sk, timeo,
+					skb_charge_size(size));
+			continue;
+		}
 
+		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
+			skb = alloc_skb(size, gfp_mask);
+			if (skb)
 				/* Full success... */
 				break;
-			}
+			ub_sock_retwres_other(sk, skb_charge_size(size),
+					SOCK_MIN_UBCSPACE_CH);
 			err = -ENOBUFS;
 			goto failure;
 		}
+		ub_sock_retwres_other(sk,
+				skb_charge_size(size),
+				SOCK_MIN_UBCSPACE_CH);
 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 		err = -EAGAIN;
@@ -1013,6 +1023,7 @@ static struct sk_buff *sock_alloc_send_p
 		timeo = sock_wait_for_wmem(sk, timeo);
 	}
 
+	ub_skb_set_charge(skb, sk, skb_charge_size(size), UB_OTHERSOCKBUF);
 	skb_set_owner_w(skb, sk);
 	return skb;
 
@@ -1023,10 +1034,12 @@ failure:
 	return NULL;
 }
 
+EXPORT_SYMBOL(sock_alloc_send_skb2);
+
 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, 
 				    int noblock, int *errcode)
 {
-	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
+	return sock_alloc_send_skb2(sk, size, size, noblock, errcode);
 }
 
 static void __lock_sock(struct sock *sk)
@@ -1469,7 +1482,8 @@ int proto_register(struct proto *prot, i
 
 	if (alloc_slab) {
 		prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
-					       SLAB_HWCACHE_ALIGN, NULL, NULL);
+					       SLAB_HWCACHE_ALIGN | SLAB_UBC,
+					       NULL, NULL);
 
 		if (prot->slab == NULL) {
 			printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
@@ -1485,9 +1499,11 @@ int proto_register(struct proto *prot, i
 				goto out_free_sock_slab;
 
 			sprintf(request_sock_slab_name, mask, prot->name);
-			prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
-								 prot->rsk_prot->obj_size, 0,
-								 SLAB_HWCACHE_ALIGN, NULL, NULL);
+			prot->rsk_prot->slab =
+				kmem_cache_create(request_sock_slab_name,
+						prot->rsk_prot->obj_size, 0,
+						SLAB_HWCACHE_ALIGN | SLAB_UBC,
+						NULL, NULL);
 
 			if (prot->rsk_prot->slab == NULL) {
 				printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
@@ -1508,7 +1524,7 @@ int proto_register(struct proto *prot, i
 			prot->twsk_prot->twsk_slab =
 				kmem_cache_create(timewait_sock_slab_name,
 						  prot->twsk_prot->twsk_obj_size,
-						  0, SLAB_HWCACHE_ALIGN,
+						  0, SLAB_HWCACHE_ALIGN | SLAB_UBC,
 						  NULL, NULL);
 			if (prot->twsk_prot->twsk_slab == NULL)
 				goto out_free_timewait_sock_slab_name;
diff -upr linux-2.6.16.46-0.12.orig/net/core/stream.c linux-2.6.16.46-0.12-027test011/net/core/stream.c
--- linux-2.6.16.46-0.12.orig/net/core/stream.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/core/stream.c	2007-08-28 17:35:30.000000000 +0400
@@ -111,8 +111,10 @@ EXPORT_SYMBOL(sk_stream_wait_close);
  * sk_stream_wait_memory - Wait for more memory for a socket
  * @sk: socket to wait for memory
  * @timeo_p: for how long
+ * @amount - amount of memory to wait for (in UB space!)
  */
-int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
+int __sk_stream_wait_memory(struct sock *sk, long *timeo_p,
+		unsigned long amount)
 {
 	int err = 0;
 	long vm_wait = 0;
@@ -134,8 +136,11 @@ int sk_stream_wait_memory(struct sock *s
 		if (signal_pending(current))
 			goto do_interrupted;
 		clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
-		if (sk_stream_memory_free(sk) && !vm_wait)
-			break;
+		if (amount == 0) {
+			if (sk_stream_memory_free(sk) && !vm_wait)
+				break;
+		} else
+			ub_sock_sndqueueadd_tcp(sk, amount);
 
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 		sk->sk_write_pending++;
@@ -144,6 +149,8 @@ int sk_stream_wait_memory(struct sock *s
 						  sk_stream_memory_free(sk) &&
 						  vm_wait);
 		sk->sk_write_pending--;
+		if (amount > 0)
+			ub_sock_sndqueuedel(sk);
 
 		if (vm_wait) {
 			vm_wait -= current_timeo;
@@ -170,6 +177,10 @@ do_interrupted:
 	goto out;
 }
 
+int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
+{
+	return __sk_stream_wait_memory(sk, timeo_p, 0);
+}
 EXPORT_SYMBOL(sk_stream_wait_memory);
 
 void sk_stream_rfree(struct sk_buff *skb)
diff -upr linux-2.6.16.46-0.12.orig/net/dccp/minisocks.c linux-2.6.16.46-0.12-027test011/net/dccp/minisocks.c
--- linux-2.6.16.46-0.12.orig/net/dccp/minisocks.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/dccp/minisocks.c	2007-08-28 17:35:30.000000000 +0400
@@ -19,6 +19,8 @@
 #include <net/xfrm.h>
 #include <net/inet_timewait_sock.h>
 
+#include <ub/ub_orphan.h>
+
 #include "ackvec.h"
 #include "ccid.h"
 #include "dccp.h"
@@ -46,7 +48,8 @@ void dccp_time_wait(struct sock *sk, int
 {
 	struct inet_timewait_sock *tw = NULL;
 
-	if (dccp_death_row.tw_count < dccp_death_row.sysctl_max_tw_buckets)
+	if (dccp_death_row.tw_count < dccp_death_row.sysctl_max_tw_buckets &&
+			ub_timewait_check(sk, &dccp_death_row))
 		tw = inet_twsk_alloc(sk, state);
 
 	if (tw != NULL) {
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/af_inet.c linux-2.6.16.46-0.12-027test011/net/ipv4/af_inet.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/af_inet.c	2007-08-24 19:28:35.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/af_inet.c	2007-08-28 17:35:32.000000000 +0400
@@ -115,6 +115,7 @@
 #ifdef CONFIG_IP_MROUTE
 #include <linux/mroute.h>
 #endif
+#include <ub/ub_net.h>
 
 DEFINE_SNMP_STAT(struct linux_mib, net_statistics) __read_mostly;
 
@@ -282,6 +283,10 @@ lookup_protocol:
 			goto out_rcu_unlock;
 	}
 
+	err = vz_security_protocol_check(answer->protocol);
+	if (err < 0)
+		goto out_rcu_unlock;
+
 	err = -EPERM;
 	if (answer->capability > 0 && !capable(answer->capability))
 		goto out_rcu_unlock;
@@ -299,6 +304,13 @@ lookup_protocol:
 	if (sk == NULL)
 		goto out;
 
+	err = -ENOBUFS;
+	if (ub_sock_charge(sk, PF_INET, sock->type))
+		goto out_sk_free;
+	/* if charge was successful, sock_init_data() MUST be called to
+	 * set sk->sk_type. otherwise sk will be uncharged to wrong resource
+	 */
+
 	err = 0;
 	sk->sk_no_check = answer_no_check;
 	if (INET_PROTOSW_REUSE & answer_flags)
@@ -356,6 +368,9 @@ out:
 out_rcu_unlock:
 	rcu_read_unlock();
 	goto out;
+out_sk_free:
+	sk_free(sk);
+	return err;
 }
 
 
@@ -370,6 +385,9 @@ int inet_release(struct socket *sock)
 
 	if (sk) {
 		long timeout;
+		struct ve_struct *saved_env;
+
+		saved_env = set_exec_env(sk->owner_env);
 
 		/* Applications forget to leave groups before exiting */
 		ip_mc_drop_socket(sk);
@@ -387,6 +405,8 @@ int inet_release(struct socket *sock)
 			timeout = sk->sk_lingertime;
 		sock->sk = NULL;
 		sk->sk_prot->close(sk, timeout);
+
+		(void)set_exec_env(saved_env);
 	}
 	return 0;
 }
@@ -1193,20 +1213,20 @@ static struct net_protocol icmp_protocol
 
 static int __init init_ipv4_mibs(void)
 {
-	net_statistics[0] = alloc_percpu(struct linux_mib);
-	net_statistics[1] = alloc_percpu(struct linux_mib);
-	ip_statistics[0] = alloc_percpu(struct ipstats_mib);
-	ip_statistics[1] = alloc_percpu(struct ipstats_mib);
-	icmp_statistics[0] = alloc_percpu(struct icmp_mib);
-	icmp_statistics[1] = alloc_percpu(struct icmp_mib);
-	tcp_statistics[0] = alloc_percpu(struct tcp_mib);
-	tcp_statistics[1] = alloc_percpu(struct tcp_mib);
-	udp_statistics[0] = alloc_percpu(struct udp_mib);
-	udp_statistics[1] = alloc_percpu(struct udp_mib);
+	ve_net_statistics[0] = alloc_percpu(struct linux_mib);
+	ve_net_statistics[1] = alloc_percpu(struct linux_mib);
+	ve_ip_statistics[0] = alloc_percpu(struct ipstats_mib);
+	ve_ip_statistics[1] = alloc_percpu(struct ipstats_mib);
+	ve_icmp_statistics[0] = alloc_percpu(struct icmp_mib);
+	ve_icmp_statistics[1] = alloc_percpu(struct icmp_mib);
+	ve_tcp_statistics[0] = alloc_percpu(struct tcp_mib);
+	ve_tcp_statistics[1] = alloc_percpu(struct tcp_mib);
+	ve_udp_statistics[0] = alloc_percpu(struct udp_mib);
+	ve_udp_statistics[1] = alloc_percpu(struct udp_mib);
 	if (!
-	    (net_statistics[0] && net_statistics[1] && ip_statistics[0]
-	     && ip_statistics[1] && tcp_statistics[0] && tcp_statistics[1]
-	     && udp_statistics[0] && udp_statistics[1]))
+	    (ve_net_statistics[0] && ve_net_statistics[1] && ve_ip_statistics[0]
+	     && ve_ip_statistics[1] && ve_tcp_statistics[0] && ve_tcp_statistics[1]
+	     && ve_udp_statistics[0] && ve_udp_statistics[1]))
 		return -ENOMEM;
 
 	(void) tcp_mib_init();
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/arp.c linux-2.6.16.46-0.12-027test011/net/ipv4/arp.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/arp.c	2007-08-24 19:28:22.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/arp.c	2007-08-28 17:35:32.000000000 +0400
@@ -175,7 +175,7 @@ struct neigh_ops arp_broken_ops = {
 	.queue_xmit =		dev_queue_xmit,
 };
 
-struct neigh_table arp_tbl = {
+struct neigh_table global_arp_tbl = {
 	.family =	AF_INET,
 	.entry_size =	sizeof(struct neighbour) + 4,
 	.key_len =	4,
@@ -184,7 +184,7 @@ struct neigh_table arp_tbl = {
 	.proxy_redo =	parp_redo,
 	.id =		"arp_cache",
 	.parms = {
-		.tbl =			&arp_tbl,
+		.tbl =			&global_arp_tbl,
 		.base_reachable_time =	30 * HZ,
 		.retrans_time =	1 * HZ,
 		.gc_staletime =	60 * HZ,
@@ -988,7 +988,7 @@ static int arp_req_set(struct arpreq *r,
 			return 0;
 		}
 		if (dev == NULL) {
-			ipv4_devconf.proxy_arp = 1;
+			ve_ipv4_devconf.proxy_arp = 1;
 			return 0;
 		}
 		if (__in_dev_get_rtnl(dev)) {
@@ -1094,7 +1094,7 @@ static int arp_req_delete(struct arpreq 
 			return pneigh_delete(&arp_tbl, &ip, dev);
 		if (mask == 0) {
 			if (dev == NULL) {
-				ipv4_devconf.proxy_arp = 0;
+				ve_ipv4_devconf.proxy_arp = 0;
 				return 0;
 			}
 			if (__in_dev_get_rtnl(dev)) {
@@ -1142,7 +1142,8 @@ int arp_ioctl(unsigned int cmd, void __u
 	switch (cmd) {
 		case SIOCDARP:
 		case SIOCSARP:
-			if (!capable(CAP_NET_ADMIN))
+			if (!capable(CAP_NET_ADMIN) &&
+					!capable(CAP_VE_NET_ADMIN))
 				return -EPERM;
 		case SIOCGARP:
 			err = copy_from_user(&r, arg, sizeof(struct arpreq));
@@ -1240,7 +1241,9 @@ static int arp_proc_init(void);
 
 void __init arp_init(void)
 {
-	neigh_table_init(&arp_tbl);
+	get_ve0()->ve_arp_tbl = &global_arp_tbl;
+	if (neigh_table_init(&arp_tbl))
+		panic("cannot initialize ARP tables\n");
 
 	dev_add_pack(&arp_packet_type);
 	arp_proc_init();
@@ -1372,8 +1375,9 @@ static int arp_seq_open(struct inode *in
 {
 	struct seq_file *seq;
 	int rc = -ENOMEM;
-	struct neigh_seq_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
-       
+	struct neigh_seq_state *s;
+
+	s = kmalloc(sizeof(*s), GFP_KERNEL);
 	if (!s)
 		goto out;
 
@@ -1401,7 +1405,7 @@ static struct file_operations arp_seq_fo
 
 static int __init arp_proc_init(void)
 {
-	if (!proc_net_fops_create("arp", S_IRUGO, &arp_seq_fops))
+	if (!proc_glob_fops_create("net/arp", S_IRUGO, &arp_seq_fops))
 		return -ENOMEM;
 	return 0;
 }
@@ -1421,9 +1425,56 @@ EXPORT_SYMBOL(arp_rcv);
 EXPORT_SYMBOL(arp_create);
 EXPORT_SYMBOL(arp_xmit);
 EXPORT_SYMBOL(arp_send);
-EXPORT_SYMBOL(arp_tbl);
+EXPORT_SYMBOL(global_arp_tbl);
 EXPORT_SYMBOL(arp_direct_ops);
 
 #if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
 EXPORT_SYMBOL(clip_tbl_hook);
 #endif
+
+#ifdef CONFIG_VE
+int ve_arp_init(struct ve_struct *ve)
+{
+	struct ve_struct *old_env;
+	int err;
+
+	ve->ve_arp_tbl = kmalloc(sizeof(struct neigh_table), GFP_KERNEL);
+	if (ve->ve_arp_tbl == NULL)
+		return -ENOMEM;
+
+	*(ve->ve_arp_tbl) = global_arp_tbl;
+	ve->ve_arp_tbl->parms.tbl = ve->ve_arp_tbl;
+	old_env = set_exec_env(ve);
+	err = neigh_table_init(ve->ve_arp_tbl);
+	if (err)
+		goto out_free;
+#ifdef CONFIG_SYSCTL
+	neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4,
+			      NET_IPV4_NEIGH, "ipv4", NULL, NULL);
+#endif
+	err = 0;
+
+out:
+	set_exec_env(old_env);
+	return err;
+
+out_free:
+	kfree(ve->ve_arp_tbl);
+	ve->ve_arp_tbl = NULL;
+	goto out;
+}
+EXPORT_SYMBOL(ve_arp_init);
+
+void ve_arp_fini(struct ve_struct *ve)
+{
+	if (ve->ve_arp_tbl) {
+#ifdef CONFIG_SYSCTL
+		neigh_sysctl_unregister(&ve->ve_arp_tbl->parms);
+#endif
+		neigh_table_clear(ve->ve_arp_tbl);
+		kfree(ve->ve_arp_tbl);
+		ve->ve_arp_tbl = NULL;
+	}
+}
+EXPORT_SYMBOL(ve_arp_fini);
+#endif /* CONFIG_VE */
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/devinet.c linux-2.6.16.46-0.12-027test011/net/ipv4/devinet.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/devinet.c	2007-08-24 19:28:09.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/devinet.c	2007-08-28 17:35:36.000000000 +0400
@@ -71,7 +71,7 @@ struct ipv4_devconf ipv4_devconf = {
 	.shared_media =	  1,
 };
 
-static struct ipv4_devconf ipv4_devconf_dflt = {
+struct ipv4_devconf ipv4_devconf_dflt = {
 	.accept_redirects =  1,
 	.send_redirects =    1,
 	.secure_redirects =  1,
@@ -79,10 +79,16 @@ static struct ipv4_devconf ipv4_devconf_
 	.accept_source_route = 1,
 };
 
+#ifdef CONFIG_VE
+#define ve_ipv4_devconf_dflt	(*(get_exec_env()->_ipv4_devconf_dflt))
+#else
+#define ve_ipv4_devconf_dflt	ipv4_devconf_dflt
+#endif
+
 static void rtmsg_ifa(int event, struct in_ifaddr *);
 
 static struct notifier_block *inetaddr_chain;
-static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 			 int destroy);
 #ifdef CONFIG_SYSCTL
 static void devinet_sysctl_register(struct in_device *in_dev,
@@ -92,7 +98,7 @@ static void devinet_sysctl_unregister(st
 
 /* Locks all the inet devices. */
 
-static struct in_ifaddr *inet_alloc_ifa(void)
+struct in_ifaddr *inet_alloc_ifa(void)
 {
 	struct in_ifaddr *ifa = kmalloc(sizeof(*ifa), GFP_KERNEL);
 
@@ -103,6 +109,7 @@ static struct in_ifaddr *inet_alloc_ifa(
 
 	return ifa;
 }
+EXPORT_SYMBOL_GPL(inet_alloc_ifa);
 
 static void inet_rcu_free_ifa(struct rcu_head *head)
 {
@@ -176,6 +183,7 @@ out_kfree:
 	in_dev = NULL;
 	goto out;
 }
+EXPORT_SYMBOL_GPL(inetdev_init);
 
 static void in_dev_rcu_put(struct rcu_head *head)
 {
@@ -191,7 +199,7 @@ static void inetdev_destroy(struct in_de
 	ASSERT_RTNL();
 
 	dev = in_dev->dev;
-	if (dev == &loopback_dev)
+	if (dev == &ve0_loopback)
 		return;
 
 	in_dev->dead = 1;
@@ -233,7 +241,7 @@ int inet_addr_onlink(struct in_device *i
 	return 0;
 }
 
-static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 			 int destroy)
 {
 	struct in_ifaddr *promote = NULL;
@@ -321,7 +329,7 @@ static void inet_del_ifa(struct in_devic
 	}
 }
 
-static int inet_insert_ifa(struct in_ifaddr *ifa)
+int inet_insert_ifa(struct in_ifaddr *ifa)
 {
 	struct in_device *in_dev = ifa->ifa_dev;
 	struct in_ifaddr *ifa1, **ifap, **last_primary;
@@ -371,6 +379,7 @@ static int inet_insert_ifa(struct in_ifa
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(inet_insert_ifa);
 
 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
 {
@@ -579,7 +588,7 @@ int devinet_ioctl(unsigned int cmd, void
 
 	case SIOCSIFFLAGS:
 		ret = -EACCES;
-		if (!capable(CAP_NET_ADMIN))
+		if (!capable(CAP_VE_NET_ADMIN))
 			goto out;
 		break;
 	case SIOCSIFADDR:	/* Set interface address (and family) */
@@ -587,7 +596,7 @@ int devinet_ioctl(unsigned int cmd, void
 	case SIOCSIFDSTADDR:	/* Set the destination address */
 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
 		ret = -EACCES;
-		if (!capable(CAP_NET_ADMIN))
+		if (!capable(CAP_VE_NET_ADMIN))
 			goto out;
 		ret = -EINVAL;
 		if (sin->sin_family != AF_INET)
@@ -1164,10 +1173,10 @@ static struct rtnetlink_link inet_rtnetl
 void inet_forward_change(void)
 {
 	struct net_device *dev;
-	int on = ipv4_devconf.forwarding;
+	int on = ve_ipv4_devconf.forwarding;
 
-	ipv4_devconf.accept_redirects = !on;
-	ipv4_devconf_dflt.forwarding = on;
+	ve_ipv4_devconf.accept_redirects = !on;
+	ve_ipv4_devconf_dflt.forwarding = on;
 
 	read_lock(&dev_base_lock);
 	for (dev = dev_base; dev; dev = dev->next) {
@@ -1192,9 +1201,9 @@ static int devinet_sysctl_forward(ctl_ta
 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
 
 	if (write && *valp != val) {
-		if (valp == &ipv4_devconf.forwarding)
+		if (valp == &ve_ipv4_devconf.forwarding)
 			inet_forward_change();
-		else if (valp != &ipv4_devconf_dflt.forwarding)
+		else if (valp != &ve_ipv4_devconf_dflt.forwarding)
 			rt_cache_flush(0);
 	}
 
@@ -1465,28 +1474,21 @@ static struct devinet_sysctl_table {
 	},
 };
 
-static void devinet_sysctl_register(struct in_device *in_dev,
-				    struct ipv4_devconf *p)
+static struct devinet_sysctl_table *__devinet_sysctl_register(char *dev_name,
+		int ifindex, struct ipv4_devconf *p)
 {
 	int i;
-	struct net_device *dev = in_dev ? in_dev->dev : NULL;
-	struct devinet_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
-	char *dev_name = NULL;
+	struct devinet_sysctl_table *t;
 
+	t = kmalloc(sizeof(*t), GFP_KERNEL);
 	if (!t)
-		return;
+		goto out;
+
 	memcpy(t, &devinet_sysctl, sizeof(*t));
 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
 		t->devinet_vars[i].de = NULL;
-	}
-
-	if (dev) {
-		dev_name = dev->name; 
-		t->devinet_dev[0].ctl_name = dev->ifindex;
-	} else {
-		dev_name = "default";
-		t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
+		t->devinet_vars[i].owner_env = get_exec_env();
 	}
 
 	/* 
@@ -1496,8 +1498,9 @@ static void devinet_sysctl_register(stru
 	 */	
 	dev_name = kstrdup(dev_name, GFP_KERNEL);
 	if (!dev_name)
-	    goto free;
+	    goto out_free_table;
 
+	t->devinet_dev[0].ctl_name    = ifindex;
 	t->devinet_dev[0].procname    = dev_name;
 	t->devinet_dev[0].child	      = t->devinet_vars;
 	t->devinet_dev[0].de	      = NULL;
@@ -1510,17 +1513,38 @@ static void devinet_sysctl_register(stru
 
 	t->sysctl_header = register_sysctl_table(t->devinet_root_dir, 0);
 	if (!t->sysctl_header)
-	    goto free_procname;
+	    goto out_free_procname;
 
-	p->sysctl = t;
-	return;
+	return t;
 
 	/* error path */
- free_procname:
+out_free_procname:
 	kfree(dev_name);
- free:
+out_free_table:
 	kfree(t);
-	return;
+out:
+	printk(KERN_DEBUG "Can't register net/ipv4/conf sysctls.\n");
+	return NULL;
+}
+
+static void devinet_sysctl_register(struct in_device *in_dev,
+				    struct ipv4_devconf *p)
+{
+	struct net_device *dev;
+	char *dev_name;
+	int ifindex;
+
+	dev = in_dev ? in_dev->dev : NULL;
+
+	if (dev) {
+		dev_name = dev->name;
+		ifindex = dev->ifindex;
+	} else {
+		dev_name = "default";
+		ifindex = NET_PROTO_CONF_DEFAULT;
+	}
+
+	p->sysctl = __devinet_sysctl_register(dev_name, ifindex, p);
 }
 
 static void devinet_sysctl_unregister(struct ipv4_devconf *p)
@@ -1533,7 +1557,175 @@ static void devinet_sysctl_unregister(st
 		kfree(t);
 	}
 }
+
+#ifdef CONFIG_VE
+static ctl_table net_sysctl_tables[] = {
+	/* 0: net */
+	{
+		.ctl_name	= CTL_NET,
+		.procname	= "net",
+		.mode		= 0555,
+		.child		= &net_sysctl_tables[2],
+	},
+	{ .ctl_name = 0, },
+	/* 2: net/ipv4 */
+	{
+		.ctl_name	= NET_IPV4,
+		.procname	= "ipv4",
+		.mode		= 0555,
+		.child		= &net_sysctl_tables[4],
+	},
+	{ .ctl_name = 0, },
+	/* 4, 5: net/ipv4/[vars] */
+	{
+		.ctl_name	= NET_IPV4_FORWARD,
+		.procname	= "ip_forward",
+		.data		= &ipv4_devconf.forwarding,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &ipv4_sysctl_forward,
+		.strategy	= &ipv4_sysctl_forward_strategy,
+	},
+	{
+		.ctl_name	= NET_IPV4_ROUTE,
+		.procname	= "route",
+		.maxlen		= 0,
+		.mode		= 0555,
+		.child		= &net_sysctl_tables[7],
+	},
+	{ .ctl_name = 0 },
+	/* 7: net/ipv4/route/flush */
+	{
+		.ctl_name 	= NET_IPV4_ROUTE_FLUSH,
+		.procname	= "flush",
+		.data		= NULL, /* setuped below */
+		.maxlen		= sizeof(int),
+		.mode		= 0200,
+		.proc_handler	= &ipv4_sysctl_rtcache_flush,
+		.strategy	= &ipv4_sysctl_rtcache_flush_strategy,
+	},
+	{ .ctl_name = 0 },
+};
+
+static int ip_forward_sysctl_register(struct ve_struct *ve,
+		struct ipv4_devconf *p)
+{
+	struct ctl_table_header *hdr;
+	ctl_table *root, *ipv4_table, *route_table;
+
+	root = clone_sysctl_template(net_sysctl_tables);
+	if (root == NULL)
+		goto out;
+
+	ipv4_table = root->child->child;
+	ipv4_table[0].data = &p->forwarding;
+
+	route_table = ipv4_table[1].child;
+	route_table[0].data = &ipv4_flush_delay;
+
+	hdr = register_sysctl_table(root, 1);
+	if (hdr == NULL)
+		goto out_free;
+
+	ve->forward_header = hdr;
+	ve->forward_table = root;
+	return 0;
+
+out_free:
+	free_sysctl_clone(root);
+out:
+	return -ENOMEM;
+}
+
+static inline void ip_forward_sysctl_unregister(struct ve_struct *ve)
+{
+	unregister_sysctl_table(ve->forward_header);
+	ve->forward_header = NULL;
+}
+
+static inline void ip_forward_sysctl_free(struct ve_struct *ve)
+{
+	if (ve->forward_table == NULL)
+		return;
+
+	free_sysctl_clone(ve->forward_table);
+	ve->forward_table = NULL;
+}
 #endif
+#endif
+
+int devinet_sysctl_init(struct ve_struct *ve)
+{
+	int err = 0;
+#ifdef CONFIG_SYSCTL
+#ifdef CONFIG_VE
+	struct ipv4_devconf *conf, *conf_def;
+
+	err = -ENOMEM;
+
+	conf = kmalloc(sizeof(*conf), GFP_KERNEL);
+	if (!conf)
+		goto err1;
+
+	memcpy(conf, &ipv4_devconf, sizeof(*conf));
+	conf->sysctl = __devinet_sysctl_register("all",
+			NET_PROTO_CONF_ALL, conf);
+	if (!conf->sysctl)
+		goto err2;
+
+	conf_def = kmalloc(sizeof(*conf_def), GFP_KERNEL);
+	if (!conf_def)
+		goto err3;
+
+	memcpy(conf_def, &ipv4_devconf_dflt, sizeof(*conf_def));
+	conf_def->sysctl = __devinet_sysctl_register("default",
+			NET_PROTO_CONF_DEFAULT, conf_def);
+	if (!conf_def->sysctl)
+		goto err4;
+
+	err = ip_forward_sysctl_register(ve, conf);
+	if (err)
+		goto err5;
+
+	ve->_ipv4_devconf = conf;
+	ve->_ipv4_devconf_dflt = conf_def;
+	return 0;
+
+err5:
+	devinet_sysctl_unregister(conf_def);
+err4:
+	kfree(conf_def);
+err3:
+	devinet_sysctl_unregister(conf);
+err2:
+	kfree(conf);
+err1:
+#endif
+#endif
+	return err;
+}
+
+void devinet_sysctl_fini(struct ve_struct *ve)
+{
+#ifdef CONFIG_SYSCTL
+#ifdef CONFIG_VE
+	ip_forward_sysctl_unregister(ve);
+	devinet_sysctl_unregister(ve->_ipv4_devconf);
+	devinet_sysctl_unregister(ve->_ipv4_devconf_dflt);
+#endif
+#endif
+}
+
+void devinet_sysctl_free(struct ve_struct *ve)
+{
+#ifdef CONFIG_SYSCTL
+#ifdef CONFIG_VE
+	ip_forward_sysctl_free(ve);
+	kfree(ve->_ipv4_devconf);
+	kfree(ve->_ipv4_devconf_dflt);
+#endif
+#endif
+}
 
 void __init devinet_init(void)
 {
@@ -1543,13 +1735,18 @@ void __init devinet_init(void)
 #ifdef CONFIG_SYSCTL
 	devinet_sysctl.sysctl_header =
 		register_sysctl_table(devinet_sysctl.devinet_root_dir, 0);
-	devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
+	__devinet_sysctl_register("default", NET_PROTO_CONF_DEFAULT,
+			&ipv4_devconf_dflt);
 #endif
 }
 
 EXPORT_SYMBOL(devinet_ioctl);
 EXPORT_SYMBOL(in_dev_finish_destroy);
 EXPORT_SYMBOL(inet_select_addr);
+EXPORT_SYMBOL(inet_del_ifa);
 EXPORT_SYMBOL(inetdev_by_index);
+EXPORT_SYMBOL(devinet_sysctl_init);
+EXPORT_SYMBOL(devinet_sysctl_fini);
+EXPORT_SYMBOL(devinet_sysctl_free);
 EXPORT_SYMBOL(register_inetaddr_notifier);
 EXPORT_SYMBOL(unregister_inetaddr_notifier);
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/fib_frontend.c linux-2.6.16.46-0.12-027test011/net/ipv4/fib_frontend.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/fib_frontend.c	2007-08-24 19:28:32.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/fib_frontend.c	2007-08-28 17:35:32.000000000 +0400
@@ -53,14 +53,46 @@
 
 #define RT_TABLE_MIN RT_TABLE_MAIN
 
+#undef ip_fib_local_table
+#undef ip_fib_main_table
 struct fib_table *ip_fib_local_table;
 struct fib_table *ip_fib_main_table;
+void prepare_fib_tables(void)
+{
+#ifdef CONFIG_VE
+	get_ve0()->_local_table = ip_fib_local_table;
+	ip_fib_local_table = (struct fib_table *)0x12345678;
+	get_ve0()->_main_table = ip_fib_main_table;
+	ip_fib_main_table = (struct fib_table *)0x12345678;
+#endif
+}
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define ip_fib_local_table 	get_exec_env()->_local_table
+#define ip_fib_main_table 	get_exec_env()->_main_table
+#endif
 
 #else
 
 #define RT_TABLE_MIN 1
 
+#undef fib_tables
 struct fib_table *fib_tables[RT_TABLE_MAX+1];
+void prepare_fib_tables(void)
+{
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	int i;
+
+	BUG_ON(sizeof(fib_tables) !=
+		sizeof(((struct ve_struct *)0)->_fib_tables));
+	memcpy(get_ve0()->_fib_tables, fib_tables, sizeof(fib_tables));
+	for (i = 0; i <= RT_TABLE_MAX; i++)
+		fib_tables[i] = (void *)0x12366678;
+#endif
+}
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+#define fib_tables get_exec_env()->_fib_tables
+#endif
 
 struct fib_table *__fib_new_table(int id)
 {
@@ -187,7 +219,8 @@ int fib_validate_source(u32 src, u32 dst
 
 	if (fib_lookup(&fl, &res))
 		goto last_resort;
-	if (res.type != RTN_UNICAST)
+	if (res.type != RTN_UNICAST &&
+		(!(dev->features & NETIF_F_VENET) || res.type != RTN_LOCAL))
 		goto e_inval_res;
 	*spec_dst = FIB_RES_PREFSRC(res);
 	fib_combine_itag(itag, &res);
@@ -250,7 +283,7 @@ int ip_rt_ioctl(unsigned int cmd, void _
 	switch (cmd) {
 	case SIOCADDRT:		/* Add a route */
 	case SIOCDELRT:		/* Delete a route */
-		if (!capable(CAP_NET_ADMIN))
+		if (!capable(CAP_VE_NET_ADMIN))
 			return -EPERM;
 		if (copy_from_user(&r, arg, sizeof(struct rtentry)))
 			return -EFAULT;
@@ -663,6 +696,7 @@ static struct notifier_block fib_netdev_
 
 void __init ip_fib_init(void)
 {
+	prepare_fib_tables();
 #ifndef CONFIG_IP_MULTIPLE_TABLES
 	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
 	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/fib_hash.c linux-2.6.16.46-0.12-027test011/net/ipv4/fib_hash.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/fib_hash.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/fib_hash.c	2007-08-28 17:35:32.000000000 +0400
@@ -36,6 +36,7 @@
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/init.h>
+#include <linux/ve.h>
 
 #include <net/ip.h>
 #include <net/protocol.h>
@@ -73,11 +74,6 @@ struct fn_zone {
  * can be cheaper than memory lookup, so that FZ_* macros are used.
  */
 
-struct fn_hash {
-	struct fn_zone	*fn_zones[33];
-	struct fn_zone	*fn_zone_list;
-};
-
 static inline u32 fn_hash(u32 key, struct fn_zone *fz)
 {
 	u32 h = ntohl(key)>>(32 - fz->fz_order);
@@ -623,7 +619,7 @@ fn_hash_delete(struct fib_table *tb, str
 	return -ESRCH;
 }
 
-static int fn_flush_list(struct fn_zone *fz, int idx)
+static int fn_flush_list(struct fn_zone *fz, int idx, int destroy)
 {
 	struct hlist_head *head = &fz->fz_hash[idx];
 	struct hlist_node *node, *n;
@@ -638,7 +634,9 @@ static int fn_flush_list(struct fn_zone 
 		list_for_each_entry_safe(fa, fa_node, &f->fn_alias, fa_list) {
 			struct fib_info *fi = fa->fa_info;
 
-			if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
+			if (fi == NULL)
+				continue;
+			if (destroy || (fi->fib_flags&RTNH_F_DEAD)) {
 				write_lock_bh(&fib_hash_lock);
 				list_del(&fa->fa_list);
 				if (list_empty(&f->fn_alias)) {
@@ -660,7 +658,7 @@ static int fn_flush_list(struct fn_zone 
 	return found;
 }
 
-static int fn_hash_flush(struct fib_table *tb)
+static int __fn_hash_flush(struct fib_table *tb, int destroy)
 {
 	struct fn_hash *table = (struct fn_hash *) tb->tb_data;
 	struct fn_zone *fz;
@@ -670,11 +668,99 @@ static int fn_hash_flush(struct fib_tabl
 		int i;
 
 		for (i = fz->fz_divisor - 1; i >= 0; i--)
-			found += fn_flush_list(fz, i);
+			found += fn_flush_list(fz, i, destroy);
 	}
 	return found;
 }
 
+static int fn_hash_flush(struct fib_table *tb)
+{
+	return __fn_hash_flush(tb, 0);
+}
+
+#ifdef CONFIG_VE
+static void fn_free_zones(struct fib_table *tb)
+{
+	struct fn_hash *table = (struct fn_hash *) tb->tb_data;
+	struct fn_zone *fz;
+
+	while ((fz = table->fn_zone_list) != NULL) {
+		table->fn_zone_list = fz->fz_next;
+		fz_hash_free(fz->fz_hash, fz->fz_divisor);
+		kfree(fz);
+	}
+}
+
+void fib_hash_destroy(struct fib_table *tb)
+{
+	__fn_hash_flush(tb, 1);
+	fn_free_zones(tb);
+	kfree(tb);
+}
+
+/*
+ * Initialization of virtualized networking subsystem.
+ */
+int init_ve_route(struct ve_struct *ve)
+{
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	if (fib_rules_create())
+		return -ENOMEM;
+	ve->_fib_tables[RT_TABLE_LOCAL] = fib_hash_init(RT_TABLE_LOCAL);
+	if (!ve->_fib_tables[RT_TABLE_LOCAL])
+		goto out_destroy;
+	ve->_fib_tables[RT_TABLE_MAIN] = fib_hash_init(RT_TABLE_MAIN);
+	if (!ve->_fib_tables[RT_TABLE_MAIN])
+		goto out_destroy_local;
+
+	return 0;
+
+out_destroy_local:
+	fib_hash_destroy(ve->_fib_tables[RT_TABLE_LOCAL]);
+out_destroy:
+	fib_rules_destroy();
+	ve->_local_rule = NULL;
+	return -ENOMEM;
+#else
+	ve->_local_table = fib_hash_init(RT_TABLE_LOCAL);
+	if (!ve->_local_table)
+		return -ENOMEM;
+	ve->_main_table = fib_hash_init(RT_TABLE_MAIN);
+	if (!ve->_main_table) {
+		fib_hash_destroy(ve->_local_table);
+		return -ENOMEM;
+	}
+	return 0;
+#endif
+}
+
+void fini_ve_route(struct ve_struct *ve)
+{
+	unsigned int bytes;
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	int i;
+	for (i=0; i<RT_TABLE_MAX+1; i++)
+	{
+		if (!ve->_fib_tables[i])
+			continue;
+		fib_hash_destroy(ve->_fib_tables[i]);
+	}
+	fib_rules_destroy();
+	ve->_local_rule = NULL;
+#else
+	fib_hash_destroy(ve->_local_table);
+	fib_hash_destroy(ve->_main_table);
+#endif
+	bytes = ve->_fib_hash_size * sizeof(struct hlist_head *);
+	fib_hash_free(ve->_fib_info_hash, bytes);
+	fib_hash_free(ve->_fib_info_laddrhash, bytes);
+	ve->_fib_info_hash = ve->_fib_info_laddrhash = NULL;
+}
+
+EXPORT_SYMBOL(init_ve_route);
+EXPORT_SYMBOL(fini_ve_route);
+#endif
+
 
 static inline int
 fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
@@ -766,7 +852,7 @@ static int fn_hash_dump(struct fib_table
 	return skb->len;
 }
 
-#ifdef CONFIG_IP_MULTIPLE_TABLES
+#if defined(CONFIG_IP_MULTIPLE_TABLES) || defined(CONFIG_VE)
 struct fib_table * fib_hash_init(int id)
 #else
 struct fib_table * __init fib_hash_init(int id)
@@ -1076,13 +1162,13 @@ static struct file_operations fib_seq_fo
 
 int __init fib_proc_init(void)
 {
-	if (!proc_net_fops_create("route", S_IRUGO, &fib_seq_fops))
+	if (!proc_glob_fops_create("net/route", S_IRUGO, &fib_seq_fops))
 		return -ENOMEM;
 	return 0;
 }
 
 void __init fib_proc_exit(void)
 {
-	proc_net_remove("route");
+	remove_proc_glob_entry("net/route", NULL);
 }
 #endif /* CONFIG_PROC_FS */
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/fib_lookup.h linux-2.6.16.46-0.12-027test011/net/ipv4/fib_lookup.h
--- linux-2.6.16.46-0.12.orig/net/ipv4/fib_lookup.h	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/fib_lookup.h	2007-08-28 17:35:32.000000000 +0400
@@ -41,5 +41,6 @@ extern struct fib_alias *fib_find_alias(
 extern int fib_detect_death(struct fib_info *fi, int order,
 			    struct fib_info **last_resort,
 			    int *last_idx, int *dflt);
+void fib_hash_free(struct hlist_head *hash, int bytes);
 
 #endif /* _FIB_LOOKUP_H */
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/fib_rules.c linux-2.6.16.46-0.12-027test011/net/ipv4/fib_rules.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/fib_rules.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/fib_rules.c	2007-08-28 17:35:32.000000000 +0400
@@ -39,6 +39,7 @@
 #include <linux/proc_fs.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
+#include <linux/rtnetlink.h>
 #include <linux/init.h>
 
 #include <net/ip.h>
@@ -99,9 +100,95 @@ static struct fib_rule local_rule = {
 	.r_action =	RTN_UNICAST,
 };
 
-static struct fib_rule *fib_rules = &local_rule;
 static DEFINE_RWLOCK(fib_rules_lock);
 
+void __init prepare_fib_rules(void)
+{
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	get_ve0()->_local_rule = &local_rule;
+	get_ve0()->_fib_rules = &local_rule;
+#endif
+}
+
+#ifdef CONFIG_VE
+#define local_rule (*(get_exec_env()->_local_rule))
+#define fib_rules (get_exec_env()->_fib_rules)
+#else
+static struct fib_rule *fib_rules = &local_rule;
+#endif
+
+#if defined(CONFIG_VE_CALLS) || defined(CONFIG_VE_CALLS_MODULE)
+int fib_rules_create(void)
+{
+#ifdef CONFIG_VE
+	struct fib_rule *default_rule, *main_rule, *loc_rule;
+
+	default_rule = kmalloc(sizeof(struct fib_rule), GFP_KERNEL);
+	if (default_rule == NULL)
+		goto out_def;
+	memset(default_rule, 0, sizeof(struct fib_rule));
+	atomic_set(&default_rule->r_clntref, 1);
+	default_rule->r_preference = 0x7FFF;
+	default_rule->r_table = RT_TABLE_DEFAULT;
+	default_rule->r_action = RTN_UNICAST;
+
+	main_rule = kmalloc(sizeof(struct fib_rule), GFP_KERNEL);
+	if (main_rule == NULL)
+		goto out_main;
+	memset(main_rule, 0, sizeof(struct fib_rule));
+	atomic_set(&main_rule->r_clntref, 1);
+	main_rule->r_preference = 0x7FFE;
+	main_rule->r_table = RT_TABLE_MAIN;
+	main_rule->r_action = RTN_UNICAST;
+	main_rule->r_next = default_rule;
+
+	loc_rule = kmalloc(sizeof(struct fib_rule), GFP_KERNEL);
+	if (loc_rule == NULL)
+		goto out_loc;
+	memset(loc_rule, 0, sizeof(struct fib_rule));
+	atomic_set(&loc_rule->r_clntref, 1);
+	loc_rule->r_preference = 0;
+	loc_rule->r_table = RT_TABLE_LOCAL;
+	loc_rule->r_action = RTN_UNICAST;
+	loc_rule->r_next = main_rule;
+
+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+	get_exec_env()->_local_rule = loc_rule;
+	get_exec_env()->_fib_rules = loc_rule;
+#endif
+
+	return 0;
+
+out_loc:
+	kfree(main_rule);
+out_main:
+	kfree(default_rule);
+out_def:
+	return -1;
+#else
+	return 0;
+#endif
+}
+
+void fib_rules_destroy(void)
+{
+#ifdef CONFIG_VE
+	struct fib_rule *r;
+
+	rtnl_lock();
+	write_lock_bh(&fib_rules_lock);
+	while(fib_rules != NULL) {
+		r = fib_rules;
+		fib_rules = fib_rules->r_next;
+		r->r_dead = 1;
+		fib_rule_put(r);
+	}
+	write_unlock_bh(&fib_rules_lock);
+	rtnl_unlock();
+#endif
+}
+#endif
+
 int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
 	struct rtattr **rta = arg;
@@ -435,5 +522,6 @@ int inet_dump_rules(struct sk_buff *skb,
 
 void __init fib_rules_init(void)
 {
+	prepare_fib_rules();
 	register_netdevice_notifier(&fib_rules_notifier);
 }
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/fib_semantics.c linux-2.6.16.46-0.12-027test011/net/ipv4/fib_semantics.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/fib_semantics.c	2007-08-24 19:28:09.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/fib_semantics.c	2007-08-28 17:35:32.000000000 +0400
@@ -33,6 +33,7 @@
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/proc_fs.h>
+#include <linux/ve.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/init.h>
@@ -56,6 +57,24 @@ static struct hlist_head *fib_info_laddr
 static unsigned int fib_hash_size;
 static unsigned int fib_info_cnt;
 
+void prepare_fib_info(void)
+{
+#ifdef CONFIG_VE
+	get_ve0()->_fib_info_hash = fib_info_hash;
+	get_ve0()->_fib_info_laddrhash = fib_info_laddrhash;
+	get_ve0()->_fib_hash_size = fib_hash_size;
+	get_ve0()->_fib_info_cnt = fib_info_cnt;
+#endif
+}
+
+#ifdef CONFIG_VE
+#define fib_info_hash (get_exec_env()->_fib_info_hash)
+#define fib_info_laddrhash (get_exec_env()->_fib_info_laddrhash)
+#define fib_hash_size (get_exec_env()->_fib_hash_size)
+#define fib_info_cnt (get_exec_env()->_fib_info_cnt)
+#endif
+
+
 #define DEVINDEX_HASHBITS 8
 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
@@ -235,13 +254,15 @@ static struct fib_info *fib_find_info(co
 	return NULL;
 }
 
-static inline unsigned int fib_devindex_hashfn(unsigned int val)
+static inline unsigned int fib_devindex_hashfn(unsigned int val,
+		envid_t veid)
 {
 	unsigned int mask = DEVINDEX_HASHSIZE - 1;
 
 	return (val ^
 		(val >> DEVINDEX_HASHBITS) ^
-		(val >> (DEVINDEX_HASHBITS * 2))) & mask;
+		(val >> (DEVINDEX_HASHBITS * 2)) ^
+		(veid ^ (veid >> 16))) & mask;
 }
 
 /* Check, that the gateway is already configured.
@@ -257,7 +278,7 @@ int ip_fib_check_default(u32 gw, struct 
 
 	read_lock(&fib_info_lock);
 
-	hash = fib_devindex_hashfn(dev->ifindex);
+	hash = fib_devindex_hashfn(dev->ifindex, VEID(dev->owner_env));
 	head = &fib_info_devhash[hash];
 	hlist_for_each_entry(nh, node, head, nh_hash) {
 		if (nh->nh_dev == dev &&
@@ -580,7 +601,7 @@ static struct hlist_head *fib_hash_alloc
 			__get_free_pages(GFP_KERNEL, get_order(bytes));
 }
 
-static void fib_hash_free(struct hlist_head *hash, int bytes)
+void fib_hash_free(struct hlist_head *hash, int bytes)
 {
 	if (!hash)
 		return;
@@ -837,7 +858,8 @@ link_it:
 
 		if (!nh->nh_dev)
 			continue;
-		hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
+		hash = fib_devindex_hashfn(nh->nh_dev->ifindex,
+				VEID(nh->nh_dev->owner_env));
 		head = &fib_info_devhash[hash];
 		hlist_add_head(&nh->nh_hash, head);
 	} endfor_nexthops(fi)
@@ -1184,7 +1206,8 @@ int fib_sync_down(u32 local, struct net_
 
 	if (dev) {
 		struct fib_info *prev_fi = NULL;
-		unsigned int hash = fib_devindex_hashfn(dev->ifindex);
+		unsigned int hash = fib_devindex_hashfn(dev->ifindex,
+				VEID(dev->owner_env));
 		struct hlist_head *head = &fib_info_devhash[hash];
 		struct hlist_node *node;
 		struct fib_nh *nh;
@@ -1249,7 +1272,7 @@ int fib_sync_up(struct net_device *dev)
 		return 0;
 
 	prev_fi = NULL;
-	hash = fib_devindex_hashfn(dev->ifindex);
+	hash = fib_devindex_hashfn(dev->ifindex, VEID(dev->owner_env));
 	head = &fib_info_devhash[hash];
 	ret = 0;
 
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/igmp.c linux-2.6.16.46-0.12-027test011/net/ipv4/igmp.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/igmp.c	2007-08-24 19:28:09.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/igmp.c	2007-08-28 17:35:32.000000000 +0400
@@ -1732,7 +1732,7 @@ int ip_mc_join_group(struct sock *sk , s
 	if (!MULTICAST(addr))
 		return -EINVAL;
 
-	rtnl_shlock();
+	rtnl_lock();
 
 	in_dev = ip_mc_find_dev(imr);
 
@@ -1765,7 +1765,7 @@ int ip_mc_join_group(struct sock *sk , s
 	ip_mc_inc_group(in_dev, addr);
 	err = 0;
 done:
-	rtnl_shunlock();
+	rtnl_unlock();
 	return err;
 }
 
@@ -1839,7 +1839,7 @@ int ip_mc_source(int add, int omode, str
 	if (!MULTICAST(addr))
 		return -EINVAL;
 
-	rtnl_shlock();
+	rtnl_lock();
 
 	imr.imr_multiaddr.s_addr = mreqs->imr_multiaddr;
 	imr.imr_address.s_addr = mreqs->imr_interface;
@@ -1949,7 +1949,7 @@ int ip_mc_source(int add, int omode, str
 	ip_mc_add_src(in_dev, &mreqs->imr_multiaddr, omode, 1, 
 		&mreqs->imr_sourceaddr, 1);
 done:
-	rtnl_shunlock();
+	rtnl_unlock();
 	if (leavegroup)
 		return ip_mc_leave_group(sk, &imr);
 	return err;
@@ -1972,7 +1972,7 @@ int ip_mc_msfilter(struct sock *sk, stru
 	    msf->imsf_fmode != MCAST_EXCLUDE)
 		return -EINVAL;
 
-	rtnl_shlock();
+	rtnl_lock();
 
 	imr.imr_multiaddr.s_addr = msf->imsf_multiaddr;
 	imr.imr_address.s_addr = msf->imsf_interface;
@@ -2032,7 +2032,7 @@ int ip_mc_msfilter(struct sock *sk, stru
 	pmc->sfmode = msf->imsf_fmode;
 	err = 0;
 done:
-	rtnl_shunlock();
+	rtnl_unlock();
 	if (leavegroup)
 		err = ip_mc_leave_group(sk, &imr);
 	return err;
@@ -2052,7 +2052,7 @@ int ip_mc_msfget(struct sock *sk, struct
 	if (!MULTICAST(addr))
 		return -EINVAL;
 
-	rtnl_shlock();
+	rtnl_lock();
 
 	imr.imr_multiaddr.s_addr = msf->imsf_multiaddr;
 	imr.imr_address.s_addr = msf->imsf_interface;
@@ -2074,7 +2074,7 @@ int ip_mc_msfget(struct sock *sk, struct
 		goto done;
 	msf->imsf_fmode = pmc->sfmode;
 	psl = pmc->sflist;
-	rtnl_shunlock();
+	rtnl_unlock();
 	if (!psl) {
 		len = 0;
 		count = 0;
@@ -2093,7 +2093,7 @@ int ip_mc_msfget(struct sock *sk, struct
 		return -EFAULT;
 	return 0;
 done:
-	rtnl_shunlock();
+	rtnl_unlock();
 	return err;
 }
 
@@ -2114,7 +2114,7 @@ int ip_mc_gsfget(struct sock *sk, struct
 	if (!MULTICAST(addr))
 		return -EINVAL;
 
-	rtnl_shlock();
+	rtnl_lock();
 
 	err = -EADDRNOTAVAIL;
 
@@ -2127,7 +2127,7 @@ int ip_mc_gsfget(struct sock *sk, struct
 		goto done;
 	gsf->gf_fmode = pmc->sfmode;
 	psl = pmc->sflist;
-	rtnl_shunlock();
+	rtnl_unlock();
 	count = psl ? psl->sl_count : 0;
 	copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
 	gsf->gf_numsrc = count;
@@ -2148,7 +2148,7 @@ int ip_mc_gsfget(struct sock *sk, struct
 	}
 	return 0;
 done:
-	rtnl_shunlock();
+	rtnl_unlock();
 	return err;
 }
 
@@ -2264,6 +2264,8 @@ static inline struct ip_mc_list *igmp_mc
 	     state->dev; 
 	     state->dev = state->dev->next) {
 		struct in_device *in_dev;
+		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+			continue;
 		in_dev = in_dev_get(state->dev);
 		if (!in_dev)
 			continue;
@@ -2293,6 +2295,8 @@ static struct ip_mc_list *igmp_mc_get_ne
 			state->in_dev = NULL;
 			break;
 		}
+		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+			continue;
 		state->in_dev = in_dev_get(state->dev);
 		if (!state->in_dev)
 			continue;
@@ -2427,6 +2431,8 @@ static inline struct ip_sf_list *igmp_mc
 	     state->dev; 
 	     state->dev = state->dev->next) {
 		struct in_device *idev;
+		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+			continue;
 		idev = in_dev_get(state->dev);
 		if (unlikely(idev == NULL))
 			continue;
@@ -2466,6 +2472,8 @@ static struct ip_sf_list *igmp_mcf_get_n
 				state->idev = NULL;
 				goto out;
 			}
+			if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+				continue;
 			state->idev = in_dev_get(state->dev);
 			if (!state->idev)
 				continue;
@@ -2586,8 +2594,8 @@ static struct file_operations igmp_mcf_s
 
 int __init igmp_mc_proc_init(void)
 {
-	proc_net_fops_create("igmp", S_IRUGO, &igmp_mc_seq_fops);
-	proc_net_fops_create("mcfilter", S_IRUGO, &igmp_mcf_seq_fops);
+	proc_glob_fops_create("net/igmp", S_IRUGO, &igmp_mc_seq_fops);
+	proc_glob_fops_create("net/mcfilter", S_IRUGO, &igmp_mcf_seq_fops);
 	return 0;
 }
 #endif
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/inet_connection_sock.c linux-2.6.16.46-0.12-027test011/net/ipv4/inet_connection_sock.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/inet_connection_sock.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/inet_connection_sock.c	2007-08-28 17:35:32.000000000 +0400
@@ -25,6 +25,9 @@
 #include <net/tcp_states.h>
 #include <net/xfrm.h>
 
+#include <ub/ub_net.h>
+#include <ub/ub_orphan.h>
+
 #ifdef INET_CSK_DEBUG
 const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
 EXPORT_SYMBOL(inet_csk_timer_bug_msg);
@@ -48,6 +51,7 @@ int inet_csk_bind_conflict(const struct 
 	sk_for_each_bound(sk2, node, &tb->owners) {
 		if (sk != sk2 &&
 		    !inet_v6_ipv6only(sk2) &&
+		    ve_accessible_strict(sk->owner_env, sk2->owner_env) &&
 		    (!sk->sk_bound_dev_if ||
 		     !sk2->sk_bound_dev_if ||
 		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
@@ -77,7 +81,9 @@ int inet_csk_get_port(struct inet_hashin
 	struct hlist_node *node;
 	struct inet_bind_bucket *tb;
 	int ret;
+	struct ve_struct *env;
 
+	env = sk->owner_env;
 	local_bh_disable();
 	if (!snum) {
 		int low = sysctl_local_port_range[0];
@@ -85,12 +91,22 @@ int inet_csk_get_port(struct inet_hashin
 		int remaining = (high - low) + 1;
 		int rover = net_random() % (high - low) + low;
 
+		/* Below we treat low > high as high == low. So do here. Den */
+		if (remaining < 1) {
+			remaining = 1;
+			rover = low;
+		}
+
 		do {
-			head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)];
+			head = &hashinfo->bhash[inet_bhashfn(rover,
+					hashinfo->bhash_size, VEID(env))];
 			spin_lock(&head->lock);
-			inet_bind_bucket_for_each(tb, node, &head->chain)
+			inet_bind_bucket_for_each(tb, node, &head->chain) {
+				if (!ve_accessible_strict(tb->owner_env, env))
+					continue;
 				if (tb->port == rover)
 					goto next;
+			}
 			break;
 		next:
 			spin_unlock(&head->lock);
@@ -113,11 +129,15 @@ int inet_csk_get_port(struct inet_hashin
 		 */
 		snum = rover;
 	} else {
-		head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)];
+		head = &hashinfo->bhash[inet_bhashfn(snum,
+				hashinfo->bhash_size, VEID(env))];
 		spin_lock(&head->lock);
-		inet_bind_bucket_for_each(tb, node, &head->chain)
+		inet_bind_bucket_for_each(tb, node, &head->chain) {
+			if (!ve_accessible_strict(tb->owner_env, env))
+				continue;
 			if (tb->port == snum)
 				goto tb_found;
+		}
 	}
 	tb = NULL;
 	goto tb_not_found;
@@ -136,7 +156,7 @@ tb_found:
 	}
 tb_not_found:
 	ret = 1;
-	if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL)
+	if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum, env)) == NULL)
 		goto fail_unlock;
 	if (hlist_empty(&tb->owners)) {
 		if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
@@ -541,7 +561,7 @@ void inet_csk_destroy_sock(struct sock *
 
 	sk_refcnt_debug_release(sk);
 
-	atomic_dec(sk->sk_prot->orphan_count);
+	ub_dec_orphan_count(sk);
 	sock_put(sk);
 }
 
@@ -621,7 +641,7 @@ void inet_csk_listen_stop(struct sock *s
 
 		sock_orphan(child);
 
-		atomic_inc(sk->sk_prot->orphan_count);
+		ub_inc_orphan_count(sk);
 
 		inet_csk_destroy_sock(child);
 
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/inet_diag.c linux-2.6.16.46-0.12-027test011/net/ipv4/inet_diag.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/inet_diag.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/inet_diag.c	2007-08-28 17:35:32.000000000 +0400
@@ -673,7 +673,9 @@ static int inet_diag_dump(struct sk_buff
 	struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
 	const struct inet_diag_handler *handler;
 	struct inet_hashinfo *hashinfo;
+	struct ve_struct *ve;
 
+	ve = get_exec_env();
 	handler = inet_diag_table[cb->nlh->nlmsg_type];
 	BUG_ON(handler == NULL);
 	hashinfo = handler->idiag_hashinfo;
@@ -694,6 +696,8 @@ static int inet_diag_dump(struct sk_buff
 			sk_for_each(sk, node, &hashinfo->listening_hash[i]) {
 				struct inet_sock *inet = inet_sk(sk);
 
+				if (!ve_accessible(sk->owner_env, ve))
+					continue;
 				if (num < s_num) {
 					num++;
 					continue;
@@ -754,6 +758,8 @@ skip_listen_ht:
 		sk_for_each(sk, node, &head->chain) {
 			struct inet_sock *inet = inet_sk(sk);
 
+			if (!ve_accessible(sk->owner_env, ve))
+				continue;
 			if (num < s_num)
 				goto next_normal;
 			if (!(r->idiag_states & (1 << sk->sk_state)))
@@ -778,6 +784,8 @@ next_normal:
 			inet_twsk_for_each(tw, node,
 				    &hashinfo->ehash[i + hashinfo->ehash_size].chain) {
 
+				if (!ve_accessible_veid(inet_twsk(sk)->tw_owner_env, VEID(ve)))
+					continue;
 				if (num < s_num)
 					goto next_dying;
 				if (r->id.idiag_sport != tw->tw_sport &&
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/inet_hashtables.c linux-2.6.16.46-0.12-027test011/net/ipv4/inet_hashtables.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/inet_hashtables.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/inet_hashtables.c	2007-08-28 17:35:32.000000000 +0400
@@ -30,7 +30,8 @@
  */
 struct inet_bind_bucket *inet_bind_bucket_create(kmem_cache_t *cachep,
 						 struct inet_bind_hashbucket *head,
-						 const unsigned short snum)
+						 const unsigned short snum,
+						 struct ve_struct *ve)
 {
 	struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, SLAB_ATOMIC);
 
@@ -38,6 +39,7 @@ struct inet_bind_bucket *inet_bind_bucke
 		tb->port      = snum;
 		tb->fastreuse = 0;
 		INIT_HLIST_HEAD(&tb->owners);
+		tb->owner_env = ve;
 		hlist_add_head(&tb->node, &head->chain);
 	}
 	return tb;
@@ -71,10 +73,13 @@ EXPORT_SYMBOL(inet_bind_hash);
  */
 static void __inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk)
 {
-	const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size);
-	struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
+	int bhash;
+	struct inet_bind_hashbucket *head;
 	struct inet_bind_bucket *tb;
 
+	bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size,
+			VEID(sk->owner_env));
+	head = &hashinfo->bhash[bhash];
 	spin_lock(&head->lock);
 	tb = inet_csk(sk)->icsk_bind_hash;
 	__sk_del_bind_node(sk);
@@ -130,7 +135,8 @@ EXPORT_SYMBOL(inet_listen_wlock);
  * wildcarded during the search since they can never be otherwise.
  */
 struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr,
-				    const unsigned short hnum, const int dif)
+				    const unsigned short hnum, const int dif,
+				    struct ve_struct *env)
 {
 	struct sock *result = NULL, *sk;
 	const struct hlist_node *node;
@@ -139,6 +145,8 @@ struct sock *__inet_lookup_listener(cons
 	sk_for_each(sk, node, head) {
 		const struct inet_sock *inet = inet_sk(sk);
 
+		if (!ve_accessible_strict(sk->owner_env, env))
+			continue;
 		if (inet->num == hnum && !ipv6_only_sock(sk)) {
 			const __u32 rcv_saddr = inet->rcv_saddr;
 			int score = sk->sk_family == PF_INET ? 1 : 0;
@@ -169,7 +177,8 @@ EXPORT_SYMBOL_GPL(__inet_lookup_listener
 /* called with local bh disabled */
 static int __inet_check_established(struct inet_timewait_death_row *death_row,
 				    struct sock *sk, __u16 lport,
-				    struct inet_timewait_sock **twp)
+				    struct inet_timewait_sock **twp,
+				    struct ve_struct *ve)
 {
 	struct inet_hashinfo *hinfo = death_row->hashinfo;
 	struct inet_sock *inet = inet_sk(sk);
@@ -178,12 +187,15 @@ static int __inet_check_established(stru
 	int dif = sk->sk_bound_dev_if;
 	INET_ADDR_COOKIE(acookie, saddr, daddr)
 	const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
-	unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
-	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
+	unsigned int hash;
+	struct inet_ehash_bucket *head;
 	struct sock *sk2;
 	const struct hlist_node *node;
 	struct inet_timewait_sock *tw;
 
+	hash = inet_ehashfn(daddr, lport, saddr, inet->dport, VEID(ve));
+	head = inet_ehash_bucket(hinfo, hash);
+
 	prefetch(head->chain.first);
 	write_lock(&head->lock);
 
@@ -191,7 +203,8 @@ static int __inet_check_established(stru
 	sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) {
 		tw = inet_twsk(sk2);
 
-		if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
+		if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr,
+					ports, dif, ve)) {
 			if (twsk_unique(sk, sk2, twp))
 				goto unique;
 			else
@@ -202,7 +215,8 @@ static int __inet_check_established(stru
 
 	/* And established part... */
 	sk_for_each(sk2, node, &head->chain) {
-		if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
+		if (INET_MATCH(sk2, hash, acookie, saddr, daddr,
+					ports, dif, ve))
 			goto not_unique;
 	}
 
@@ -253,7 +267,9 @@ int inet_hash_connect(struct inet_timewa
  	struct inet_bind_hashbucket *head;
  	struct inet_bind_bucket *tb;
 	int ret;
+	struct ve_struct *ve;
 
+	ve = sk->owner_env;
  	if (!snum) {
  		int low = sysctl_local_port_range[0];
  		int high = sysctl_local_port_range[1];
@@ -268,7 +284,8 @@ int inet_hash_connect(struct inet_timewa
  		local_bh_disable();
 		for (i = 1; i <= range; i++) {
 			port = low + (i + offset) % range;
- 			head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
+ 			head = &hinfo->bhash[inet_bhashfn(port,
+					hinfo->bhash_size, VEID(ve))];
  			spin_lock(&head->lock);
 
  			/* Does not bother with rcv_saddr checks,
@@ -276,19 +293,21 @@ int inet_hash_connect(struct inet_timewa
  			 * unique enough.
  			 */
 			inet_bind_bucket_for_each(tb, node, &head->chain) {
- 				if (tb->port == port) {
+ 				if (tb->port == port &&
+				    ve_accessible_strict(tb->owner_env, ve)) {
  					BUG_TRAP(!hlist_empty(&tb->owners));
  					if (tb->fastreuse >= 0)
  						goto next_port;
  					if (!__inet_check_established(death_row,
 								      sk, port,
-								      &tw))
+								      &tw, ve))
  						goto ok;
  					goto next_port;
  				}
  			}
 
- 			tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, port);
+ 			tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
+					head, port, ve);
  			if (!tb) {
  				spin_unlock(&head->lock);
  				break;
@@ -323,7 +342,7 @@ ok:
 		goto out;
  	}
 
- 	head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
+ 	head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size, VEID(ve))];
  	tb  = inet_csk(sk)->icsk_bind_hash;
 	spin_lock_bh(&head->lock);
 	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
@@ -333,7 +352,7 @@ ok:
 	} else {
 		spin_unlock(&head->lock);
 		/* No definite answer... Walk to established hash table */
-		ret = __inet_check_established(death_row, sk, snum, NULL);
+		ret = __inet_check_established(death_row, sk, snum, NULL, ve);
 out:
 		local_bh_enable();
 		return ret;
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/inet_timewait_sock.c linux-2.6.16.46-0.12-027test011/net/ipv4/inet_timewait_sock.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/inet_timewait_sock.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/inet_timewait_sock.c	2007-08-28 17:35:32.000000000 +0400
@@ -14,6 +14,8 @@
 #include <net/inet_timewait_sock.h>
 #include <net/ip.h>
 
+#include <ub/ub_orphan.h>
+
 /* Must be called with locally disabled BHs. */
 void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo)
 {
@@ -32,7 +34,8 @@ void __inet_twsk_kill(struct inet_timewa
 	write_unlock(&ehead->lock);
 
 	/* Disassociate with bind bucket. */
-	bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)];
+	bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num,
+			hashinfo->bhash_size, tw->tw_owner_env)];
 	spin_lock(&bhead->lock);
 	tb = tw->tw_tb;
 	__hlist_del(&tw->tw_bind_node);
@@ -66,7 +69,8 @@ void __inet_twsk_hashdance(struct inet_t
 	   Note, that any socket with inet->num != 0 MUST be bound in
 	   binding cache, even if it is closed.
 	 */
-	bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)];
+	bhead = &hashinfo->bhash[inet_bhashfn(inet->num,
+			hashinfo->bhash_size, tw->tw_owner_env)];
 	spin_lock(&bhead->lock);
 	tw->tw_tb = icsk->icsk_bind_hash;
 	BUG_TRAP(icsk->icsk_bind_hash);
@@ -90,9 +94,14 @@ EXPORT_SYMBOL_GPL(__inet_twsk_hashdance)
 
 struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state)
 {
-	struct inet_timewait_sock *tw =
-		kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
-				 SLAB_ATOMIC);
+	struct user_beancounter *ub;
+	struct inet_timewait_sock *tw;
+
+	ub = set_exec_ub(sock_bc(sk)->ub);
+	tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
+			SLAB_ATOMIC);
+	(void)set_exec_ub(ub);
+
 	if (tw != NULL) {
 		const struct inet_sock *inet = inet_sk(sk);
 
@@ -140,6 +149,7 @@ static int inet_twdr_do_twkill_work(stru
 rescan:
 	inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) {
 		__inet_twsk_del_dead_node(tw);
+		ub_timewait_dec(tw, twdr);
 		spin_unlock(&twdr->death_lock);
 		__inet_twsk_kill(tw, twdr->hashinfo);
 		inet_twsk_put(tw);
@@ -238,6 +248,7 @@ void inet_twsk_deschedule(struct inet_ti
 {
 	spin_lock(&twdr->death_lock);
 	if (inet_twsk_del_dead_node(tw)) {
+		ub_timewait_dec(tw, twdr);
 		inet_twsk_put(tw);
 		if (--twdr->tw_count == 0)
 			del_timer(&twdr->tw_timer);
@@ -284,9 +295,10 @@ void inet_twsk_schedule(struct inet_time
 	spin_lock(&twdr->death_lock);
 
 	/* Unlink it, if it was scheduled */
-	if (inet_twsk_del_dead_node(tw))
+	if (inet_twsk_del_dead_node(tw)) {
+		ub_timewait_dec(tw, twdr);
 		twdr->tw_count--;
-	else
+	} else
 		atomic_inc(&tw->tw_refcnt);
 
 	if (slot >= INET_TWDR_RECYCLE_SLOTS) {
@@ -322,6 +334,7 @@ void inet_twsk_schedule(struct inet_time
 
 	hlist_add_head(&tw->tw_death_node, list);
 
+	ub_timewait_inc(tw, twdr);
 	if (twdr->tw_count++ == 0)
 		mod_timer(&twdr->tw_timer, jiffies + twdr->period);
 	spin_unlock(&twdr->death_lock);
@@ -356,6 +369,7 @@ void inet_twdr_twcal_tick(unsigned long 
 						       &twdr->twcal_row[slot]) {
 				__inet_twsk_del_dead_node(tw);
 				__inet_twsk_kill(tw, twdr->hashinfo);
+				ub_timewait_dec(tw, twdr);
 				inet_twsk_put(tw);
 				killed++;
 			}
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/ip_forward.c linux-2.6.16.46-0.12-027test011/net/ipv4/ip_forward.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/ip_forward.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/ip_forward.c	2007-08-28 17:35:32.000000000 +0400
@@ -87,6 +87,24 @@ int ip_forward(struct sk_buff *skb)
 	if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
 		goto sr_failed;
 
+	/*
+	 * We try to optimize forwarding of VE packets:
+	 * do not decrement TTL (and so save skb_cow)
+	 * during forwarding of outgoing pkts from VE.
+	 * For incoming pkts we still do ttl decr,
+	 * since such skb is not cloned and does not require
+	 * actual cow. So, there is at least one place
+	 * in pkts path with mandatory ttl decr, that is
+	 * sufficient to prevent routing loops.
+	 */
+	iph = skb->nh.iph;
+	if (
+#ifdef CONFIG_IP_ROUTE_NAT
+	    (rt->rt_flags & RTCF_NAT) == 0 &&	  /* no NAT mangling expected */
+#endif						  /* and */
+	    (skb->dev->features & NETIF_F_VENET)) /* src is VENET device */
+		goto no_ttl_decr;
+
 	/* We are about to mangle packet. Copy it! */
 	if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len))
 		goto drop;
@@ -95,6 +113,8 @@ int ip_forward(struct sk_buff *skb)
 	/* Decrease ttl after skb cow done */
 	ip_decrease_ttl(iph);
 
+no_ttl_decr:
+
 	/*
 	 *	We now generate an ICMP HOST REDIRECT giving the route
 	 *	we calculated.
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/ip_fragment.c linux-2.6.16.46-0.12-027test011/net/ipv4/ip_fragment.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/ip_fragment.c	2007-08-24 19:28:09.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/ip_fragment.c	2007-08-28 17:35:32.000000000 +0400
@@ -97,6 +97,7 @@ struct ipq {
 	int             iif;
 	unsigned int    rid;
 	struct inet_peer *peer;
+	struct ve_struct *owner_env;
 };
 
 /* Hash table. */
@@ -182,7 +183,8 @@ static __inline__ void frag_free_queue(s
 
 static __inline__ struct ipq *frag_alloc_queue(void)
 {
-	struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC);
+	struct ipq *qp = kmalloc(sizeof(struct ipq) + sizeof(void *),
+				GFP_ATOMIC);
 
 	if(!qp)
 		return NULL;
@@ -278,6 +280,9 @@ static void ip_evictor(void)
 static void ip_expire(unsigned long arg)
 {
 	struct ipq *qp = (struct ipq *) arg;
+	struct ve_struct *envid;
+
+	envid = set_exec_env(qp->owner_env);
 
 	spin_lock(&qp->lock);
 
@@ -300,6 +305,8 @@ static void ip_expire(unsigned long arg)
 out:
 	spin_unlock(&qp->lock);
 	ipq_put(qp, NULL);
+
+	(void)set_exec_env(envid);
 }
 
 /* Creation primitives. */
@@ -325,7 +332,8 @@ static struct ipq *ip_frag_intern(struct
 		   qp->saddr == qp_in->saddr	&&
 		   qp->daddr == qp_in->daddr	&&
 		   qp->protocol == qp_in->protocol &&
-		   qp->user == qp_in->user) {
+		   qp->user == qp_in->user	&&
+		   qp->owner_env == get_exec_env()) {
 			atomic_inc(&qp->refcnt);
 			write_unlock(&ipfrag_lock);
 			qp_in->last_in |= COMPLETE;
@@ -375,6 +383,8 @@ static struct ipq *ip_frag_create(struct
 	spin_lock_init(&qp->lock);
 	atomic_set(&qp->refcnt, 1);
 
+	qp->owner_env = get_exec_env();
+
 	return ip_frag_intern(qp);
 
 out_nomem:
@@ -402,7 +412,8 @@ static inline struct ipq *ip_find(struct
 		   qp->saddr == saddr	&&
 		   qp->daddr == daddr	&&
 		   qp->protocol == protocol &&
-		   qp->user == user) {
+		   qp->user == user	&&
+		   qp->owner_env == get_exec_env()) {
 			atomic_inc(&qp->refcnt);
 			read_unlock(&ipfrag_lock);
 			return qp;
@@ -724,6 +735,9 @@ struct sk_buff *ip_defrag(struct sk_buff
 		    qp->meat == qp->len)
 			ret = ip_frag_reasm(qp, dev);
 
+		if (ret)
+			ret->owner_env = skb->owner_env;
+
 		spin_unlock(&qp->lock);
 		ipq_put(qp, NULL);
 		return ret;
@@ -734,6 +748,49 @@ struct sk_buff *ip_defrag(struct sk_buff
 	return NULL;
 }
 
+#ifdef CONFIG_VE
+/* XXX */
+void ip_fragment_cleanup(struct ve_struct *envid)
+{
+	int i, progress;
+
+	/* All operations with fragment queues are performed from NET_RX/TX
+	 * soft interrupts or from timer context.  --Den */
+	local_bh_disable();
+	do {
+		progress = 0;
+		for (i = 0; i < IPQ_HASHSZ; i++) {
+			struct ipq *qp;
+			struct hlist_node *p, *n;
+
+			if (hlist_empty(&ipq_hash[i]))
+				continue;
+inner_restart:
+			read_lock(&ipfrag_lock);
+			hlist_for_each_entry_safe(qp, p, n,
+					&ipq_hash[i], list) {
+				if (!ve_accessible_strict(qp->owner_env, envid))
+					continue;
+				atomic_inc(&qp->refcnt);
+				read_unlock(&ipfrag_lock);
+
+				spin_lock(&qp->lock);
+				if (!(qp->last_in&COMPLETE))
+					ipq_kill(qp);
+				spin_unlock(&qp->lock);
+
+				ipq_put(qp, NULL);
+				progress = 1;
+				goto inner_restart;
+			}
+			read_unlock(&ipfrag_lock);
+		}
+	} while(progress);
+	local_bh_enable();
+}
+EXPORT_SYMBOL(ip_fragment_cleanup);
+#endif
+
 void ipfrag_init(void)
 {
 	ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/ip_input.c linux-2.6.16.46-0.12-027test011/net/ipv4/ip_input.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/ip_input.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/ip_input.c	2007-08-28 17:35:32.000000000 +0400
@@ -201,6 +201,9 @@ static inline int ip_local_deliver_finis
 {
 	int ihl = skb->nh.iph->ihl*4;
 
+	if (skb->destructor)
+		skb_orphan(skb);
+
 	__skb_pull(skb, ihl);
 
         /* Point into the IP datagram, just past the header. */
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/ip_output.c linux-2.6.16.46-0.12-027test011/net/ipv4/ip_output.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/ip_output.c	2007-08-24 19:28:35.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/ip_output.c	2007-08-28 17:35:29.000000000 +0400
@@ -1338,12 +1338,13 @@ void ip_send_reply(struct sock *sk, stru
 		char			data[40];
 	} replyopts;
 	struct ipcm_cookie ipc;
-	u32 daddr;
+	u32 saddr, daddr;
 	struct rtable *rt = (struct rtable*)skb->dst;
 
 	if (ip_options_echo(&replyopts.opt, skb))
 		return;
 
+	saddr = skb->nh.iph->daddr;
 	daddr = ipc.addr = rt->rt_src;
 	ipc.opt = NULL;
 
@@ -1357,7 +1358,7 @@ void ip_send_reply(struct sock *sk, stru
 	{
 		struct flowi fl = { .nl_u = { .ip4_u =
 					      { .daddr = daddr,
-						.saddr = rt->rt_spec_dst,
+						.saddr = saddr,
 						.tos = RT_TOS(skb->nh.iph->tos) } },
 				    /* Not quite clean, but right. */
 				    .uli_u = { .ports =
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/ip_sockglue.c linux-2.6.16.46-0.12-027test011/net/ipv4/ip_sockglue.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/ip_sockglue.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/ip_sockglue.c	2007-08-28 17:35:32.000000000 +0400
@@ -484,7 +484,8 @@ int ip_setsockopt(struct sock *sk, int l
 				val |= inet->tos & 3;
 			}
 			if (IPTOS_PREC(val) >= IPTOS_PREC_CRITIC_ECP && 
-			    !capable(CAP_NET_ADMIN)) {
+			    !capable(CAP_NET_ADMIN) &&
+			    !capable(CAP_VE_NET_ADMIN)) {
 				err = -EPERM;
 				break;
 			}
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/ipconfig.c linux-2.6.16.46-0.12-027test011/net/ipv4/ipconfig.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/ipconfig.c	2007-08-24 19:28:08.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/ipconfig.c	2007-08-28 17:35:30.000000000 +0400
@@ -186,7 +186,7 @@ static int __init ic_open_devs(void)
 	unsigned short oflags;
 
 	last = &ic_first_dev;
-	rtnl_shlock();
+	rtnl_lock();
 
 	/* bring loopback device up first */
 	if (dev_change_flags(&loopback_dev, loopback_dev.flags | IFF_UP) < 0)
@@ -215,7 +215,7 @@ static int __init ic_open_devs(void)
 				continue;
 			}
 			if (!(d = kmalloc(sizeof(struct ic_device), GFP_KERNEL))) {
-				rtnl_shunlock();
+				rtnl_unlock();
 				return -1;
 			}
 			d->dev = dev;
@@ -232,7 +232,7 @@ static int __init ic_open_devs(void)
 				dev->name, able, d->xid));
 		}
 	}
-	rtnl_shunlock();
+	rtnl_unlock();
 
 	*last = NULL;
 
@@ -251,7 +251,7 @@ static void __init ic_close_devs(void)
 	struct ic_device *d, *next;
 	struct net_device *dev;
 
-	rtnl_shlock();
+	rtnl_lock();
 	next = ic_first_dev;
 	while ((d = next)) {
 		next = d->next;
@@ -262,7 +262,7 @@ static void __init ic_close_devs(void)
 		}
 		kfree(d);
 	}
-	rtnl_shunlock();
+	rtnl_unlock();
 }
 
 /*
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/ipmr.c linux-2.6.16.46-0.12-027test011/net/ipv4/ipmr.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/ipmr.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/ipmr.c	2007-08-28 17:35:32.000000000 +0400
@@ -837,7 +837,7 @@ static void mrtsock_destruct(struct sock
 {
 	rtnl_lock();
 	if (sk == mroute_socket) {
-		ipv4_devconf.mc_forwarding--;
+		ve_ipv4_devconf.mc_forwarding--;
 
 		write_lock_bh(&mrt_lock);
 		mroute_socket=NULL;
@@ -888,7 +888,7 @@ int ip_mroute_setsockopt(struct sock *sk
 				mroute_socket=sk;
 				write_unlock_bh(&mrt_lock);
 
-				ipv4_devconf.mc_forwarding++;
+				ve_ipv4_devconf.mc_forwarding++;
 			}
 			rtnl_unlock();
 			return ret;
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/ipvs/ip_vs_conn.c linux-2.6.16.46-0.12-027test011/net/ipv4/ipvs/ip_vs_conn.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/ipvs/ip_vs_conn.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/ipvs/ip_vs_conn.c	2007-08-28 17:35:30.000000000 +0400
@@ -902,7 +902,8 @@ int ip_vs_conn_init(void)
 	/* Allocate ip_vs_conn slab cache */
 	ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn",
 					      sizeof(struct ip_vs_conn), 0,
-					      SLAB_HWCACHE_ALIGN, NULL, NULL);
+					      SLAB_HWCACHE_ALIGN | SLAB_UBC,
+					      NULL, NULL);
 	if (!ip_vs_conn_cachep) {
 		vfree(ip_vs_conn_tab);
 		return -ENOMEM;
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/ipvs/ip_vs_core.c linux-2.6.16.46-0.12-027test011/net/ipv4/ipvs/ip_vs_core.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/ipvs/ip_vs_core.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/ipvs/ip_vs_core.c	2007-08-28 17:35:32.000000000 +0400
@@ -952,6 +952,10 @@ ip_vs_in(unsigned int hooknum, struct sk
 	 *	Big tappo: only PACKET_HOST (neither loopback nor mcasts)
 	 *	... don't know why 1st test DOES NOT include 2nd (?)
 	 */
+	/*
+	 * VZ: the question above is right.
+	 * The second test is superfluous.
+	 */
 	if (unlikely(skb->pkt_type != PACKET_HOST
 		     || skb->dev == &loopback_dev || skb->sk)) {
 		IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/arp_tables.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/arp_tables.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/arp_tables.c	2007-08-24 19:28:09.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/arp_tables.c	2007-08-28 17:35:36.000000000 +0400
@@ -1089,7 +1089,7 @@ int arpt_register_table(struct arpt_tabl
 	int ret;
 	struct xt_table_info *newinfo;
 	static struct xt_table_info bootstrap
-		= { 0, 0, 0, { 0 }, { 0 }, { } };
+		= { 0, 0, 0, 0, { 0 }, { 0 }, { } };
 	void *loc_cpu_entry;
 
 	newinfo = xt_alloc_table_info(repl->size);
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_conntrack_core.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_conntrack_core.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_conntrack_core.c	2007-08-24 19:28:06.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_conntrack_core.c	2007-08-28 17:35:33.000000000 +0400
@@ -49,6 +49,7 @@
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
 #include <linux/netfilter_ipv4/listhelp.h>
+#include <ub/ub_mem.h>
 
 #define IP_CONNTRACK_VERSION	"2.4"
 
@@ -60,22 +61,41 @@
 
 DEFINE_RWLOCK(ip_conntrack_lock);
 
-/* ip_conntrack_standalone needs this */
-atomic_t ip_conntrack_count = ATOMIC_INIT(0);
+#ifdef CONFIG_VE_IPTABLES
+#define ve_ip_conntrack_helpers \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_helpers)
+#define ve_ip_conntrack_max \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_max)
+#define ve_ip_conntrack_count \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_count)
+#define ve_ip_conntrack_unconfirmed \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_unconfirmed)
+#else
 
 void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
 LIST_HEAD(ip_conntrack_expect_list);
 struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
 static LIST_HEAD(helpers);
+struct list_head *ip_conntrack_hash;
+static LIST_HEAD(unconfirmed);
+#define ve_ip_conntrack_count 		ip_conntrack_count
+#define ve_ip_conntrack_helpers		helpers
+#define ve_ip_conntrack_max 		ip_conntrack_max
+#define ve_ip_conntrack_unconfirmed 	unconfirmed
+#endif
+
+/* ip_conntrack_standalone needs this */
+atomic_t ip_conntrack_count = ATOMIC_INIT(0);
+
 unsigned int ip_conntrack_htable_size = 0;
 int ip_conntrack_max;
-struct list_head *ip_conntrack_hash;
 static kmem_cache_t *ip_conntrack_cachep __read_mostly;
 static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly;
 struct ip_conntrack ip_conntrack_untracked;
 unsigned int ip_ct_log_invalid;
-static LIST_HEAD(unconfirmed);
+#ifndef CONFIG_VE_IPTABLES
 static int ip_conntrack_vmalloc;
+#endif
 
 static unsigned int ip_conntrack_next_id = 1;
 static unsigned int ip_conntrack_expect_next_id = 1;
@@ -105,6 +125,9 @@ void ip_ct_deliver_cached_events(const s
 {
 	struct ip_conntrack_ecache *ecache;
 	
+	if (!ve_is_super(get_exec_env()))
+		return;
+
 	local_bh_disable();
 	ecache = &__get_cpu_var(ip_conntrack_ecache);
 	if (ecache->ct == ct)
@@ -133,6 +156,9 @@ static void ip_ct_event_cache_flush(void
 	struct ip_conntrack_ecache *ecache;
 	int cpu;
 
+	if (!ve_is_super(get_exec_env()))
+		return;
+
 	for_each_cpu(cpu) {
 		ecache = &per_cpu(ip_conntrack_ecache, cpu);
 		if (ecache->ct)
@@ -226,7 +252,7 @@ __ip_conntrack_expect_find(const struct 
 {
 	struct ip_conntrack_expect *i;
 	
-	list_for_each_entry(i, &ip_conntrack_expect_list, list) {
+	list_for_each_entry(i, &ve_ip_conntrack_expect_list, list) {
 		if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
 			atomic_inc(&i->use);
 			return i;
@@ -255,7 +281,7 @@ find_expectation(const struct ip_conntra
 {
 	struct ip_conntrack_expect *i;
 
-	list_for_each_entry(i, &ip_conntrack_expect_list, list) {
+	list_for_each_entry(i, &ve_ip_conntrack_expect_list, list) {
 		/* If master is not in hash table yet (ie. packet hasn't left
 		   this machine yet), how can other end know about expected?
 		   Hence these are not the droids you are looking for (if
@@ -284,7 +310,7 @@ void ip_ct_remove_expectations(struct ip
 	if (ct->expecting == 0)
 		return;
 
-	list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) {
+	list_for_each_entry_safe(i, tmp, &ve_ip_conntrack_expect_list, list) {
 		if (i->master == ct && del_timer(&i->timeout)) {
 			ip_ct_unlink_expect(i);
 			ip_conntrack_expect_put(i);
@@ -302,8 +328,10 @@ clean_from_lists(struct ip_conntrack *ct
 
 	ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 	hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-	LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
-	LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
+	LIST_DELETE(&ct->ct_owner_env->_ip_conntrack->_ip_conntrack_hash[ho],
+ 		    &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
+	LIST_DELETE(&ct->ct_owner_env->_ip_conntrack->_ip_conntrack_hash[hr],
+ 		    &ct->tuplehash[IP_CT_DIR_REPLY]);
 
 	/* Destroy all pending expectations */
 	ip_ct_remove_expectations(ct);
@@ -314,7 +342,11 @@ destroy_conntrack(struct nf_conntrack *n
 {
 	struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
 	struct ip_conntrack_protocol *proto;
+#ifdef CONFIG_VE_IPTABLES
+	struct ve_struct *old;
 
+	old = set_exec_env(ct->ct_owner_env);
+#endif
 	DEBUGP("destroy_conntrack(%p)\n", ct);
 	IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
 	IP_NF_ASSERT(!timer_pending(&ct->timeout));
@@ -329,8 +361,8 @@ destroy_conntrack(struct nf_conntrack *n
 	if (proto && proto->destroy)
 		proto->destroy(ct);
 
-	if (ip_conntrack_destroyed)
-		ip_conntrack_destroyed(ct);
+	if (ve_ip_conntrack_destroyed)
+		ve_ip_conntrack_destroyed(ct);
 
 	write_lock_bh(&ip_conntrack_lock);
 	/* Expectations will have been removed in clean_from_lists,
@@ -353,6 +385,9 @@ destroy_conntrack(struct nf_conntrack *n
 
 	DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
 	ip_conntrack_free(ct);
+#ifdef CONFIG_VE_IPTABLES
+	(void)set_exec_env(old);
+#endif
 }
 
 static void death_by_timeout(unsigned long ul_conntrack)
@@ -386,7 +421,7 @@ __ip_conntrack_find(const struct ip_conn
 	unsigned int hash = hash_conntrack(tuple);
 
 	ASSERT_READ_LOCK(&ip_conntrack_lock);
-	list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
+	list_for_each_entry(h, &ve_ip_conntrack_hash[hash], list) {
 		if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
 			CONNTRACK_STAT_INC(found);
 			return h;
@@ -418,9 +453,9 @@ static void __ip_conntrack_hash_insert(s
 					unsigned int repl_hash) 
 {
 	ct->id = ++ip_conntrack_next_id;
-	list_prepend(&ip_conntrack_hash[hash],
+	list_prepend(&ve_ip_conntrack_hash[hash],
 		     &ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
-	list_prepend(&ip_conntrack_hash[repl_hash],
+	list_prepend(&ve_ip_conntrack_hash[repl_hash],
 		     &ct->tuplehash[IP_CT_DIR_REPLY].list);
 }
 
@@ -471,11 +506,11 @@ __ip_conntrack_confirm(struct sk_buff **
 	/* See if there's one in the list already, including reverse:
            NAT could have grabbed it without realizing, since we're
            not in the hash.  If there is, we lost race. */
-	if (!LIST_FIND(&ip_conntrack_hash[hash],
+	if (!LIST_FIND(&ve_ip_conntrack_hash[hash],
 		       conntrack_tuple_cmp,
 		       struct ip_conntrack_tuple_hash *,
 		       &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
-	    && !LIST_FIND(&ip_conntrack_hash[repl_hash],
+	    && !LIST_FIND(&ve_ip_conntrack_hash[repl_hash],
 			  conntrack_tuple_cmp,
 			  struct ip_conntrack_tuple_hash *,
 			  &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
@@ -569,7 +604,7 @@ static inline int helper_cmp(const struc
 static struct ip_conntrack_helper *
 __ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple)
 {
-	return LIST_FIND(&helpers, helper_cmp,
+	return LIST_FIND(&ve_ip_conntrack_helpers, helper_cmp,
 			 struct ip_conntrack_helper *,
 			 tuple);
 }
@@ -605,7 +640,7 @@ void ip_conntrack_helper_put(struct ip_c
 struct ip_conntrack_protocol *
 __ip_conntrack_proto_find(u_int8_t protocol)
 {
-	return ip_ct_protos[protocol];
+	return ve_ip_ct_protos[protocol];
 }
 
 /* this is guaranteed to always return a valid protocol helper, since
@@ -632,29 +667,32 @@ void ip_conntrack_proto_put(struct ip_co
 }
 
 struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
-					struct ip_conntrack_tuple *repl)
+		struct ip_conntrack_tuple *repl, struct user_beancounter *ub)
 {
 	struct ip_conntrack *conntrack;
+	struct user_beancounter *old_ub;
 
 	if (!ip_conntrack_hash_rnd_initted) {
 		get_random_bytes(&ip_conntrack_hash_rnd, 4);
 		ip_conntrack_hash_rnd_initted = 1;
 	}
 
-	if (ip_conntrack_max
-	    && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
+	if (ve_ip_conntrack_max
+	    && atomic_read(&ve_ip_conntrack_count) >= ve_ip_conntrack_max) {
 		unsigned int hash = hash_conntrack(orig);
 		/* Try dropping from this hash chain. */
-		if (!early_drop(&ip_conntrack_hash[hash])) {
+		if (!early_drop(&ve_ip_conntrack_hash[hash])) {
 			if (net_ratelimit())
-				printk(KERN_WARNING
-				       "ip_conntrack: table full, dropping"
-				       " packet.\n");
+				ve_printk(VE_LOG_BOTH, KERN_WARNING
+				       "ip_conntrack: VE %d: table full, dropping"
+				       " packet.\n", VEID(get_exec_env()));
 			return ERR_PTR(-ENOMEM);
 		}
 	}
 
+	old_ub = set_exec_ub(ub);
 	conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
+	(void)set_exec_ub(old_ub);
 	if (!conntrack) {
 		DEBUGP("Can't allocate conntrack.\n");
 		return ERR_PTR(-ENOMEM);
@@ -669,8 +707,11 @@ struct ip_conntrack *ip_conntrack_alloc(
 	init_timer(&conntrack->timeout);
 	conntrack->timeout.data = (unsigned long)conntrack;
 	conntrack->timeout.function = death_by_timeout;
+#ifdef CONFIG_VE_IPTABLES
+	conntrack->ct_owner_env = get_exec_env();
+#endif
 
-	atomic_inc(&ip_conntrack_count);
+	atomic_inc(&ve_ip_conntrack_count);
 
 	return conntrack;
 }
@@ -678,7 +719,7 @@ struct ip_conntrack *ip_conntrack_alloc(
 void
 ip_conntrack_free(struct ip_conntrack *conntrack)
 {
-	atomic_dec(&ip_conntrack_count);
+	atomic_dec(&ve_ip_conntrack_count);
 	kmem_cache_free(ip_conntrack_cachep, conntrack);
 }
 
@@ -692,13 +733,22 @@ init_conntrack(struct ip_conntrack_tuple
 	struct ip_conntrack *conntrack;
 	struct ip_conntrack_tuple repl_tuple;
 	struct ip_conntrack_expect *exp;
+	struct user_beancounter *ub;
 
 	if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
 		DEBUGP("Can't invert tuple.\n");
 		return NULL;
 	}
 
-	conntrack = ip_conntrack_alloc(tuple, &repl_tuple);
+#ifdef CONFIG_USER_RESOURCE
+	if (skb->dev != NULL)  /* received skb */
+		ub = netdev_bc(skb->dev)->exec_ub;
+	else if (skb->sk != NULL) /* sent skb */
+		ub = sock_bc(skb->sk)->ub;
+	else
+#endif
+		ub = NULL;
+	conntrack = ip_conntrack_alloc(tuple, &repl_tuple, ub);
 	if (conntrack == NULL || IS_ERR(conntrack))
 		return (struct ip_conntrack_tuple_hash *)conntrack;
 
@@ -733,7 +783,8 @@ init_conntrack(struct ip_conntrack_tuple
 	}
 
 	/* Overload tuple linked list to put us in unconfirmed list. */
-	list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
+	list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list,
+			&ve_ip_conntrack_unconfirmed);
 
 	write_unlock_bh(&ip_conntrack_lock);
 
@@ -925,7 +976,7 @@ void ip_conntrack_unexpect_related(struc
 
 	write_lock_bh(&ip_conntrack_lock);
 	/* choose the the oldest expectation to evict */
-	list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
+	list_for_each_entry_reverse(i, &ve_ip_conntrack_expect_list, list) {
 		if (expect_matches(i, exp) && del_timer(&i->timeout)) {
 			ip_ct_unlink_expect(i);
 			write_unlock_bh(&ip_conntrack_lock);
@@ -959,11 +1010,11 @@ void ip_conntrack_expect_put(struct ip_c
 		kmem_cache_free(ip_conntrack_expect_cachep, exp);
 }
 
-static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
+void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
 {
 	atomic_inc(&exp->use);
 	exp->master->expecting++;
-	list_add(&exp->list, &ip_conntrack_expect_list);
+	list_add(&exp->list, &ve_ip_conntrack_expect_list);
 
 	init_timer(&exp->timeout);
 	exp->timeout.data = (unsigned long)exp;
@@ -975,13 +1026,14 @@ static void ip_conntrack_expect_insert(s
 	atomic_inc(&exp->use);
 	CONNTRACK_STAT_INC(expect_create);
 }
+EXPORT_SYMBOL_GPL(ip_conntrack_expect_insert);
 
 /* Race with expectations being used means we could have none to find; OK. */
 static void evict_oldest_expect(struct ip_conntrack *master)
 {
 	struct ip_conntrack_expect *i;
 
-	list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
+	list_for_each_entry_reverse(i, &ve_ip_conntrack_expect_list, list) {
 		if (i->master == master) {
 			if (del_timer(&i->timeout)) {
 				ip_ct_unlink_expect(i);
@@ -1012,7 +1064,7 @@ int ip_conntrack_expect_related(struct i
 	DEBUGP("mask:  "); DUMP_TUPLE(&expect->mask);
 
 	write_lock_bh(&ip_conntrack_lock);
-	list_for_each_entry(i, &ip_conntrack_expect_list, list) {
+	list_for_each_entry(i, &ve_ip_conntrack_expect_list, list) {
 		if (expect_matches(i, expect)) {
 			/* Refresh timer: if it's dying, ignore.. */
 			if (refresh_timer(i)) {
@@ -1060,18 +1112,48 @@ int ip_conntrack_helper_register(struct 
 {
 	BUG_ON(me->timeout == 0);
 	write_lock_bh(&ip_conntrack_lock);
-	list_prepend(&helpers, me);
+	list_prepend(&ve_ip_conntrack_helpers, me);
 	write_unlock_bh(&ip_conntrack_lock);
 
 	return 0;
 }
 
+int virt_ip_conntrack_helper_register(struct ip_conntrack_helper *me)
+{
+	int ret;
+	struct module *mod = me->me;
+
+	if (!ve_is_super(get_exec_env())) {
+		struct ip_conntrack_helper *tmp;
+		__module_get(mod);
+		ret = -ENOMEM;
+		tmp = kmalloc(sizeof(struct ip_conntrack_helper), GFP_KERNEL);
+		if (!tmp)
+			goto nomem;
+		memcpy(tmp, me, sizeof(struct ip_conntrack_helper));
+		me = tmp;
+	}
+
+	ret = ip_conntrack_helper_register(me);
+	if (ret)
+		goto out;
+
+	return 0;
+out:
+	if (!ve_is_super(get_exec_env())){
+		kfree(me);
+nomem:
+		module_put(mod);
+	}
+	return ret;
+}
+
 struct ip_conntrack_helper *
 __ip_conntrack_helper_find_byname(const char *name)
 {
 	struct ip_conntrack_helper *h;
 
-	list_for_each_entry(h, &helpers, list) {
+	list_for_each_entry(h, &ve_ip_conntrack_helpers, list) {
 		if (!strcmp(h->name, name))
 			return h;
 	}
@@ -1096,19 +1178,20 @@ void ip_conntrack_helper_unregister(stru
 
 	/* Need write lock here, to delete helper. */
 	write_lock_bh(&ip_conntrack_lock);
-	LIST_DELETE(&helpers, me);
+	LIST_DELETE(&ve_ip_conntrack_helpers, me);
 
 	/* Get rid of expectations */
-	list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
+	list_for_each_entry_safe(exp, tmp, &ve_ip_conntrack_expect_list, list) {
 		if (exp->master->helper == me && del_timer(&exp->timeout)) {
 			ip_ct_unlink_expect(exp);
 			ip_conntrack_expect_put(exp);
 		}
 	}
 	/* Get rid of expecteds, set helpers to NULL. */
-	LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me);
+	LIST_FIND_W(&ve_ip_conntrack_unconfirmed, unhelp,
+			struct ip_conntrack_tuple_hash*, me);
 	for (i = 0; i < ip_conntrack_htable_size; i++)
-		LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
+		LIST_FIND_W(&ve_ip_conntrack_hash[i], unhelp,
 			    struct ip_conntrack_tuple_hash *, me);
 	write_unlock_bh(&ip_conntrack_lock);
 
@@ -1116,6 +1199,25 @@ void ip_conntrack_helper_unregister(stru
 	synchronize_net();
 }
 
+void virt_ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
+{
+
+	if (!ve_is_super(get_exec_env())) {
+		read_lock_bh(&ip_conntrack_lock);
+		me = list_named_find(&ve_ip_conntrack_helpers, me->name);
+		read_unlock_bh(&ip_conntrack_lock);
+		if (!me)
+			return;
+	}
+
+	ip_conntrack_helper_unregister(me);
+
+	if (!ve_is_super(get_exec_env())) {
+		module_put(me->me);
+		kfree(me);
+	}
+}
+
 /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
 void __ip_ct_refresh_acct(struct ip_conntrack *ct, 
 		        enum ip_conntrack_info ctinfo,
@@ -1246,13 +1348,13 @@ get_next_corpse(int (*iter)(struct ip_co
 
 	write_lock_bh(&ip_conntrack_lock);
 	for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
-		h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter,
+		h = LIST_FIND_W(&ve_ip_conntrack_hash[*bucket], do_iter,
 				struct ip_conntrack_tuple_hash *, iter, data);
 		if (h)
 			break;
 	}
 	if (!h)
-		h = LIST_FIND_W(&unconfirmed, do_iter,
+		h = LIST_FIND_W(&ve_ip_conntrack_unconfirmed, do_iter,
 				struct ip_conntrack_tuple_hash *, iter, data);
 	if (h)
 		atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
@@ -1289,6 +1391,11 @@ getorigdst(struct sock *sk, int optval, 
 	struct ip_conntrack_tuple_hash *h;
 	struct ip_conntrack_tuple tuple;
 	
+#ifdef CONFIG_VE_IPTABLES
+	if (!get_exec_env()->_ip_conntrack)
+		return -ENOPROTOOPT;
+#endif
+
 	IP_CT_TUPLE_U_BLANK(&tuple);
 	tuple.src.ip = inet->rcv_saddr;
 	tuple.src.u.tcp.port = inet->sport;
@@ -1360,12 +1467,17 @@ static void free_conntrack_hash(struct l
 			   get_order(sizeof(struct list_head) * size));
 }
 
+static void ip_conntrack_cache_free(void)
+{
+	kmem_cache_destroy(ip_conntrack_expect_cachep);
+	kmem_cache_destroy(ip_conntrack_cachep);
+	nf_unregister_sockopt(&so_getorigdst);
+}
+
 /* Mishearing the voices in his head, our hero wonders how he's
    supposed to kill the mall. */
 void ip_conntrack_cleanup(void)
 {
-	ip_ct_attach = NULL;
-
 	/* This makes sure all current packets have passed through
            netfilter framework.  Roll on, two-stage module
            delete... */
@@ -1374,19 +1486,32 @@ void ip_conntrack_cleanup(void)
 	ip_ct_event_cache_flush();
  i_see_dead_people:
 	ip_conntrack_flush();
-	if (atomic_read(&ip_conntrack_count) != 0) {
+	if (atomic_read(&ve_ip_conntrack_count) != 0) {
 		schedule();
 		goto i_see_dead_people;
 	}
-	/* wait until all references to ip_conntrack_untracked are dropped */
-	while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
-		schedule();
-
-	kmem_cache_destroy(ip_conntrack_cachep);
-	kmem_cache_destroy(ip_conntrack_expect_cachep);
-	free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
+	if (ve_is_super(get_exec_env())) {
+		/* wait until all references to ip_conntrack_untracked are
+		 * dropped */
+		while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
+			schedule();
+		ip_ct_attach = NULL;
+		ip_conntrack_cache_free();
+	}
+	free_conntrack_hash(ve_ip_conntrack_hash, ve_ip_conntrack_vmalloc,
 			    ip_conntrack_htable_size);
-	nf_unregister_sockopt(&so_getorigdst);
+	ve_ip_conntrack_hash = NULL;
+	INIT_LIST_HEAD(&ve_ip_conntrack_unconfirmed);
+	INIT_LIST_HEAD(&ve_ip_conntrack_expect_list);
+	INIT_LIST_HEAD(&ve_ip_conntrack_helpers);
+	atomic_set(&ve_ip_conntrack_count, 0);
+	ve_ip_conntrack_max = 0;
+#ifdef CONFIG_VE_IPTABLES
+	kfree(ve_ip_ct_protos);
+	ve_ip_ct_protos = NULL;
+	kfree(get_exec_env()->_ip_conntrack);
+	get_exec_env()->_ip_conntrack = NULL;
+#endif
 }
 
 static struct list_head *alloc_hashtable(int size, int *vmalloced)
@@ -1395,13 +1520,13 @@ static struct list_head *alloc_hashtable
 	unsigned int i;
 
 	*vmalloced = 0; 
-	hash = (void*)__get_free_pages(GFP_KERNEL, 
+	hash = (void*)__get_free_pages(GFP_KERNEL_UBC,
 				       get_order(sizeof(struct list_head)
 						 * size));
 	if (!hash) { 
 		*vmalloced = 1;
 		printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n");
-		hash = vmalloc(sizeof(struct list_head) * size);
+		hash = ub_vmalloc(sizeof(struct list_head) * size);
 	}
 
 	if (hash)
@@ -1437,8 +1562,8 @@ static int set_hashsize(const char *val,
 
 	write_lock_bh(&ip_conntrack_lock);
 	for (i = 0; i < ip_conntrack_htable_size; i++) {
-		while (!list_empty(&ip_conntrack_hash[i])) {
-			h = list_entry(ip_conntrack_hash[i].next,
+		while (!list_empty(&ve_ip_conntrack_hash[i])) {
+			h = list_entry(ve_ip_conntrack_hash[i].next,
 				       struct ip_conntrack_tuple_hash, list);
 			list_del(&h->list);
 			bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
@@ -1446,12 +1571,12 @@ static int set_hashsize(const char *val,
 		}
 	}
 	old_size = ip_conntrack_htable_size;
-	old_vmalloced = ip_conntrack_vmalloc;
-	old_hash = ip_conntrack_hash;
+	old_vmalloced = ve_ip_conntrack_vmalloc;
+	old_hash = ve_ip_conntrack_hash;
 
 	ip_conntrack_htable_size = hashsize;
-	ip_conntrack_vmalloc = vmalloced;
-	ip_conntrack_hash = hash;
+	ve_ip_conntrack_vmalloc = vmalloced;
+	ve_ip_conntrack_hash = hash;
 	ip_conntrack_hash_rnd = rnd;
 	write_unlock_bh(&ip_conntrack_lock);
 
@@ -1462,9 +1587,8 @@ static int set_hashsize(const char *val,
 module_param_call(hashsize, set_hashsize, param_get_uint,
 		  &ip_conntrack_htable_size, 0600);
 
-int __init ip_conntrack_init(void)
+static int ip_conntrack_cache_create(void)
 {
-	unsigned int i;
 	int ret;
 
 	/* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
@@ -1478,70 +1602,127 @@ int __init ip_conntrack_init(void)
 		if (ip_conntrack_htable_size < 16)
 			ip_conntrack_htable_size = 16;
 	}
-	ip_conntrack_max = 8 * ip_conntrack_htable_size;
+	ve_ip_conntrack_max = 8 * ip_conntrack_htable_size;
 
 	printk("ip_conntrack version %s (%u buckets, %d max)"
 	       " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
-	       ip_conntrack_htable_size, ip_conntrack_max,
+	       ip_conntrack_htable_size, ve_ip_conntrack_max,
 	       sizeof(struct ip_conntrack));
 
 	ret = nf_register_sockopt(&so_getorigdst);
 	if (ret != 0) {
 		printk(KERN_ERR "Unable to register netfilter socket option\n");
-		return ret;
-	}
-
-	ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
-					    &ip_conntrack_vmalloc);
-	if (!ip_conntrack_hash) {
-		printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
-		goto err_unreg_sockopt;
+		goto out_sockopt;
 	}
 
+	ret = -ENOMEM;
 	ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
 	                                        sizeof(struct ip_conntrack), 0,
-	                                        0, NULL, NULL);
+	                                        SLAB_UBC, NULL, NULL);
 	if (!ip_conntrack_cachep) {
 		printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
-		goto err_free_hash;
+		goto err_unreg_sockopt;
 	}
 
 	ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect",
 					sizeof(struct ip_conntrack_expect),
-					0, 0, NULL, NULL);
+					0, SLAB_UBC, NULL, NULL);
 	if (!ip_conntrack_expect_cachep) {
 		printk(KERN_ERR "Unable to create ip_expect slab cache\n");
 		goto err_free_conntrack_slab;
 	}
 
+	return 0;
+
+err_free_conntrack_slab:
+	kmem_cache_destroy(ip_conntrack_cachep);
+err_unreg_sockopt:
+	nf_unregister_sockopt(&so_getorigdst);
+out_sockopt:
+	return ret;
+}
+
+int ip_conntrack_init(void)
+{
+	struct ve_struct *env;
+	unsigned int i;
+	int ret;
+
+	env = get_exec_env();
+#ifdef CONFIG_VE_IPTABLES
+	ret = -ENOMEM;
+	env->_ip_conntrack =
+		kmalloc(sizeof(struct ve_ip_conntrack), GFP_KERNEL);
+	if (!env->_ip_conntrack)
+		goto out;
+	memset(env->_ip_conntrack, 0, sizeof(struct ve_ip_conntrack));
+	if (ve_is_super(env)) {
+		ret = ip_conntrack_cache_create();
+		if (ret)
+			goto cache_fail;
+	} else
+		ve_ip_conntrack_max = 8 * ip_conntrack_htable_size;
+#else /* CONFIG_VE_IPTABLES */
+	ret = ip_conntrack_cache_create();
+	if (ret)
+		goto out;
+#endif
+
+	ret = -ENOMEM;
+	ve_ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
+					    &ve_ip_conntrack_vmalloc);
+	if (!ve_ip_conntrack_hash) {
+		printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
+		goto err_free_cache;
+	}
+
+#ifdef CONFIG_VE_IPTABLES
+	ve_ip_ct_protos = (struct ip_conntrack_protocol **)
+		ub_kmalloc(sizeof(void *)*MAX_IP_CT_PROTO, GFP_KERNEL);
+	if (!ve_ip_ct_protos)
+		goto err_free_hash;
+#endif
 	/* Don't NEED lock here, but good form anyway. */
 	write_lock_bh(&ip_conntrack_lock);
 	for (i = 0; i < MAX_IP_CT_PROTO; i++)
-		ip_ct_protos[i] = &ip_conntrack_generic_protocol;
+		ve_ip_ct_protos[i] = &ip_conntrack_generic_protocol;
 	/* Sew in builtin protocols. */
-	ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
-	ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
-	ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
+	ve_ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
+	ve_ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
+	ve_ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
 	write_unlock_bh(&ip_conntrack_lock);
 
-	/* For use by ipt_REJECT */
-	ip_ct_attach = ip_conntrack_attach;
-
-	/* Set up fake conntrack:
-	    - to never be deleted, not in any hashes */
-	atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
-	/*  - and look it like as a confirmed connection */
-	set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
+	INIT_LIST_HEAD(&ve_ip_conntrack_unconfirmed);
+	INIT_LIST_HEAD(&ve_ip_conntrack_expect_list);
+	INIT_LIST_HEAD(&ve_ip_conntrack_helpers);
+
+	if (ve_is_super(env)) {
+		/* For use by ipt_REJECT */
+		ip_ct_attach = ip_conntrack_attach;
+
+		/* Set up fake conntrack:
+		    - to never be deleted, not in any hashes */
+		atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
+		/*  - and look it like as a confirmed connection */
+		set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
+	}
 
-	return ret;
+	return 0;
 
-err_free_conntrack_slab:
-	kmem_cache_destroy(ip_conntrack_cachep);
+#ifdef CONFIG_VE_IPTABLES
 err_free_hash:
-	free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
+#endif
+	free_conntrack_hash(ve_ip_conntrack_hash, ve_ip_conntrack_vmalloc,
 			    ip_conntrack_htable_size);
-err_unreg_sockopt:
-	nf_unregister_sockopt(&so_getorigdst);
-
-	return -ENOMEM;
+	ve_ip_conntrack_hash = NULL;
+err_free_cache:
+	if (ve_is_super(env))
+		ip_conntrack_cache_free();
+#ifdef CONFIG_VE_IPTABLES
+cache_fail:
+	kfree(env->_ip_conntrack);
+	env->_ip_conntrack = NULL;
+#endif
+out:
+	return ret;
 }
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_conntrack_ftp.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_conntrack_ftp.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_conntrack_ftp.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_conntrack_ftp.c	2007-08-28 17:35:32.000000000 +0400
@@ -15,6 +15,7 @@
 #include <linux/ctype.h>
 #include <net/checksum.h>
 #include <net/tcp.h>
+#include <linux/nfcalls.h>
 
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
 #include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
@@ -425,8 +426,8 @@ static int help(struct sk_buff **pskb,
 
 	/* Now, NAT might want to mangle the packet, and register the
 	 * (possibly changed) expectation itself. */
-	if (ip_nat_ftp_hook)
-		ret = ip_nat_ftp_hook(pskb, ctinfo, search[i].ftptype,
+	if (ve_ip_nat_ftp_hook)
+		ret = ve_ip_nat_ftp_hook(pskb, ctinfo, search[i].ftptype,
 				      matchoff, matchlen, exp, &seq);
 	else {
 		/* Can't expect this?  Best to drop packet now. */
@@ -452,16 +453,39 @@ out_update_nl:
 static struct ip_conntrack_helper ftp[MAX_PORTS];
 static char ftp_names[MAX_PORTS][sizeof("ftp-65535")];
 
-/* Not __exit: called from init() */
-static void fini(void)
+void fini_iptable_ftp(void)
 {
 	int i;
 	for (i = 0; i < ports_c; i++) {
 		DEBUGP("ip_ct_ftp: unregistering helper for port %d\n",
 				ports[i]);
-		ip_conntrack_helper_unregister(&ftp[i]);
+		virt_ip_conntrack_helper_unregister(&ftp[i]);
 	}
+}
+
+int init_iptable_ftp(void)
+{
+	int i, ret;
 
+	for (i = 0; i < ports_c; i++) {
+		DEBUGP("ip_ct_ftp: registering helper for port %d\n",
+				ports[i]);
+		ret = virt_ip_conntrack_helper_register(&ftp[i]);
+		if (ret) {
+			fini_iptable_ftp();
+			return ret;
+		}
+	}
+	return 0;
+}
+
+/* Not __exit: called from init() */
+static void fini(void)
+{
+	KSYMMODUNRESOLVE(ip_conntrack_ftp);
+	KSYMUNRESOLVE(init_iptable_ftp);
+	KSYMUNRESOLVE(fini_iptable_ftp);
+	fini_iptable_ftp();
 	kfree(ftp_buffer);
 }
 
@@ -496,13 +520,17 @@ static int __init init(void)
 
 		DEBUGP("ip_ct_ftp: registering helper for port %d\n", 
 				ports[i]);
-		ret = ip_conntrack_helper_register(&ftp[i]);
+		ret = virt_ip_conntrack_helper_register(&ftp[i]);
 
 		if (ret) {
 			fini();
 			return ret;
 		}
 	}
+
+	KSYMRESOLVE(init_iptable_ftp);
+	KSYMRESOLVE(fini_iptable_ftp);
+	KSYMMODRESOLVE(ip_conntrack_ftp);
 	return 0;
 }
 
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_conntrack_irc.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_conntrack_irc.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_conntrack_irc.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_conntrack_irc.c	2007-08-28 17:35:32.000000000 +0400
@@ -28,6 +28,7 @@
 #include <linux/ip.h>
 #include <net/checksum.h>
 #include <net/tcp.h>
+#include <linux/nfcalls.h>
 
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
 #include <linux/netfilter_ipv4/ip_conntrack_irc.h>
@@ -244,6 +245,33 @@ static char irc_names[MAX_PORTS][sizeof(
 
 static void fini(void);
 
+void fini_iptable_irc(void)
+{
+	int i;
+	for (i = 0; i < ports_c; i++) {
+		DEBUGP("unregistering port %d\n",
+		       ports[i]);
+		virt_ip_conntrack_helper_unregister(&irc_helpers[i]);
+	}
+}
+
+int init_iptable_irc(void)
+{
+	int i, ret;
+
+	for (i = 0; i < ports_c; i++) {
+		DEBUGP("port #%d: %d\n", i, ports[i]);
+		ret = virt_ip_conntrack_helper_register(&irc_helpers[i]);
+		if (ret) {
+			printk("ip_conntrack_irc: ERROR registering port %d\n",
+				ports[i]);
+			fini_iptable_irc();
+			return -EBUSY;
+		}
+	}
+	return 0;
+}
+
 static int __init init(void)
 {
 	int i, ret;
@@ -283,7 +311,7 @@ static int __init init(void)
 
 		DEBUGP("port #%d: %d\n", i, ports[i]);
 
-		ret = ip_conntrack_helper_register(hlpr);
+		ret = virt_ip_conntrack_helper_register(hlpr);
 
 		if (ret) {
 			printk("ip_conntrack_irc: ERROR registering port %d\n",
@@ -292,6 +320,10 @@ static int __init init(void)
 			return -EBUSY;
 		}
 	}
+
+	KSYMRESOLVE(init_iptable_irc);
+	KSYMRESOLVE(fini_iptable_irc);
+	KSYMMODRESOLVE(ip_conntrack_irc);
 	return 0;
 }
 
@@ -299,12 +331,10 @@ static int __init init(void)
  * it is needed by the init function */
 static void fini(void)
 {
-	int i;
-	for (i = 0; i < ports_c; i++) {
-		DEBUGP("unregistering port %d\n",
-		       ports[i]);
-		ip_conntrack_helper_unregister(&irc_helpers[i]);
-	}
+	KSYMMODUNRESOLVE(ip_conntrack_irc);
+	KSYMUNRESOLVE(init_iptable_irc);
+	KSYMUNRESOLVE(fini_iptable_irc);
+	fini_iptable_irc();
 	kfree(irc_buffer);
 }
 
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_conntrack_netlink.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_conntrack_netlink.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_conntrack_netlink.c	2007-08-24 19:28:10.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_conntrack_netlink.c	2007-08-28 17:35:32.000000000 +0400
@@ -29,6 +29,7 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/notifier.h>
+#include <net/sock.h>
 
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4/ip_conntrack.h>
@@ -39,6 +40,8 @@
 
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_conntrack.h>
+#include <ub/beancounter.h>
+#include <ub/ub_sk.h>
 
 MODULE_LICENSE("GPL");
 
@@ -403,7 +406,7 @@ ctnetlink_dump_table(struct sk_buff *skb
 
 	read_lock_bh(&ip_conntrack_lock);
 	for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) {
-		list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
+		list_for_each_prev(i, &ve_ip_conntrack_hash[cb->args[0]]) {
 			h = (struct ip_conntrack_tuple_hash *) i;
 			if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
 				continue;
@@ -440,7 +443,7 @@ ctnetlink_dump_table_w(struct sk_buff *s
 
 	write_lock_bh(&ip_conntrack_lock);
 	for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) {
-		list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
+		list_for_each_prev(i, &ve_ip_conntrack_hash[cb->args[0]]) {
 			h = (struct ip_conntrack_tuple_hash *) i;
 			if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
 				continue;
@@ -1003,14 +1006,15 @@ ctnetlink_change_conntrack(struct ip_con
 static int
 ctnetlink_create_conntrack(struct nfattr *cda[], 
 			   struct ip_conntrack_tuple *otuple,
-			   struct ip_conntrack_tuple *rtuple)
+			   struct ip_conntrack_tuple *rtuple,
+			   struct user_beancounter *ub)
 {
 	struct ip_conntrack *ct;
 	int err = -EINVAL;
 
 	DEBUGP("entered %s\n", __FUNCTION__);
 
-	ct = ip_conntrack_alloc(otuple, rtuple);
+	ct = ip_conntrack_alloc(otuple, rtuple, ub);
 	if (ct == NULL || IS_ERR(ct))
 		return -ENOMEM;	
 
@@ -1089,8 +1093,16 @@ ctnetlink_new_conntrack(struct sock *ctn
 		write_unlock_bh(&ip_conntrack_lock);
 		DEBUGP("no such conntrack, create new\n");
 		err = -ENOENT;
-		if (nlh->nlmsg_flags & NLM_F_CREATE)
-			err = ctnetlink_create_conntrack(cda, &otuple, &rtuple);
+		if (nlh->nlmsg_flags & NLM_F_CREATE) {
+#ifdef CONFIG_USER_RESOURCE
+			if (skb->sk)
+				err = ctnetlink_create_conntrack(cda, &otuple,
+						&rtuple, sock_bc(skb->sk)->ub);
+			else
+#endif
+				err = ctnetlink_create_conntrack(cda,
+						&otuple, &rtuple, NULL);
+		}
 		return err;
 	}
 	/* implicit 'else' */
@@ -1251,7 +1263,7 @@ ctnetlink_exp_dump_table(struct sk_buff 
 	DEBUGP("entered %s, last id=%llu\n", __FUNCTION__, *id);
 
 	read_lock_bh(&ip_conntrack_lock);
-	list_for_each_prev(i, &ip_conntrack_expect_list) {
+	list_for_each_prev(i, &ve_ip_conntrack_expect_list) {
 		exp = (struct ip_conntrack_expect *) i;
 		if (exp->id <= *id)
 			continue;
@@ -1397,7 +1409,7 @@ ctnetlink_del_expect(struct sock *ctnl, 
 			write_unlock_bh(&ip_conntrack_lock);
 			return -EINVAL;
 		}
-		list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
+		list_for_each_entry_safe(exp, tmp, &ve_ip_conntrack_expect_list,
 					 list) {
 			if (exp->master->helper == h 
 			    && del_timer(&exp->timeout)) {
@@ -1409,7 +1421,7 @@ ctnetlink_del_expect(struct sock *ctnl, 
 	} else {
 		/* This basically means we have to flush everything*/
 		write_lock_bh(&ip_conntrack_lock);
-		list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
+		list_for_each_entry_safe(exp, tmp, &ve_ip_conntrack_expect_list,
 					 list) {
 			if (del_timer(&exp->timeout)) {
 				ip_ct_unlink_expect(exp);
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_conntrack_proto_generic.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_conntrack_proto_generic.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_conntrack_proto_generic.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_conntrack_proto_generic.c	2007-08-28 17:35:32.000000000 +0400
@@ -52,7 +52,7 @@ static int packet(struct ip_conntrack *c
 		  const struct sk_buff *skb,
 		  enum ip_conntrack_info ctinfo)
 {
-	ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_generic_timeout);
+	ip_ct_refresh_acct(conntrack, ctinfo, skb, ve_ip_ct_generic_timeout);
 	return NF_ACCEPT;
 }
 
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_conntrack_proto_icmp.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_conntrack_proto_icmp.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_conntrack_proto_icmp.c	2007-08-28 17:35:32.000000000 +0400
@@ -104,7 +104,7 @@ static int icmp_packet(struct ip_conntra
 	} else {
 		atomic_inc(&ct->proto.icmp.count);
 		ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
-		ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout);
+		ip_ct_refresh_acct(ct, ctinfo, skb, ve_ip_ct_icmp_timeout);
 	}
 
 	return NF_ACCEPT;
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_conntrack_proto_sctp.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_conntrack_proto_sctp.c	2007-08-24 19:28:06.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_conntrack_proto_sctp.c	2007-08-28 17:35:30.000000000 +0400
@@ -461,7 +461,8 @@ static int sctp_new(struct ip_conntrack 
 						SCTP_CONNTRACK_NONE, sch->type);
 
 		/* Invalid: delete conntrack */
-		if (newconntrack == SCTP_CONNTRACK_MAX) {
+		if (newconntrack == SCTP_CONNTRACK_NONE ||
+		    newconntrack == SCTP_CONNTRACK_MAX) {
 			DEBUGP("ip_conntrack_sctp: invalid new deleting.\n");
 			return 0;
 		}
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_conntrack_proto_tcp.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_conntrack_proto_tcp.c	2007-08-24 19:28:10.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_conntrack_proto_tcp.c	2007-08-28 17:35:32.000000000 +0400
@@ -98,7 +98,7 @@ unsigned int ip_ct_tcp_timeout_close =  
    to ~13-30min depending on RTO. */
 unsigned int ip_ct_tcp_timeout_max_retrans =     5 MINS;
  
-static const unsigned int * tcp_timeouts[]
+const unsigned int * tcp_timeouts[]
 = { NULL,                              /*      TCP_CONNTRACK_NONE */
     &ip_ct_tcp_timeout_syn_sent,       /*      TCP_CONNTRACK_SYN_SENT, */
     &ip_ct_tcp_timeout_syn_recv,       /*      TCP_CONNTRACK_SYN_RECV, */
@@ -762,7 +762,7 @@ static int tcp_in_window(struct ip_ct_tc
 			: "SEQ is under the lower bound (already ACKed data retransmitted)"
 			: "SEQ is over the upper bound (over the window of the receiver)");
 
-		res = ip_ct_tcp_be_liberal;
+		res = ve_ip_ct_tcp_be_liberal;
   	}
   
 	DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
@@ -1035,9 +1035,11 @@ static int tcp_packet(struct ip_conntrac
 	    && (new_state == TCP_CONNTRACK_FIN_WAIT
 	    	|| new_state == TCP_CONNTRACK_CLOSE))
 		conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
-	timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans
-		  && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans
-		  ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
+	timeout = conntrack->proto.tcp.retrans >= ve_ip_ct_tcp_max_retrans &&
+		ve_ip_ct_tcp_timeouts[new_state] >
+					ve_ip_ct_tcp_timeout_max_retrans
+		? ve_ip_ct_tcp_timeout_max_retrans :
+					ve_ip_ct_tcp_timeouts[new_state];
 	write_unlock_bh(&tcp_lock);
 
 	ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
@@ -1112,7 +1114,7 @@ static int tcp_new(struct ip_conntrack *
 		conntrack->proto.tcp.seen[1].flags = 0;
 		conntrack->proto.tcp.seen[0].loose = 
 		conntrack->proto.tcp.seen[1].loose = 0;
-	} else if (ip_ct_tcp_loose == 0) {
+	} else if (ve_ip_ct_tcp_loose == 0) {
 		/* Don't try to pick up connections. */
 		return 0;
 	} else {
@@ -1136,7 +1138,7 @@ static int tcp_new(struct ip_conntrack *
 		conntrack->proto.tcp.seen[0].flags =
 		conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM;
 		conntrack->proto.tcp.seen[0].loose = 
-		conntrack->proto.tcp.seen[1].loose = ip_ct_tcp_loose;
+		conntrack->proto.tcp.seen[1].loose = ve_ip_ct_tcp_loose;
 	}
     
 	conntrack->proto.tcp.seen[1].td_end = 0;
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_conntrack_proto_udp.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_conntrack_proto_udp.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_conntrack_proto_udp.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_conntrack_proto_udp.c	2007-08-28 17:35:32.000000000 +0400
@@ -71,12 +71,12 @@ static int udp_packet(struct ip_conntrac
 	   stream.  Extend timeout. */
 	if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
 		ip_ct_refresh_acct(conntrack, ctinfo, skb, 
-				   ip_ct_udp_timeout_stream);
+				   ve_ip_ct_udp_timeout_stream);
 		/* Also, more likely to be important, and not a probe */
 		if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status))
 			ip_conntrack_event_cache(IPCT_STATUS, skb);
 	} else
-		ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout);
+		ip_ct_refresh_acct(conntrack, ctinfo, skb, ve_ip_ct_udp_timeout);
 
 	return NF_ACCEPT;
 }
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_conntrack_standalone.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_conntrack_standalone.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_conntrack_standalone.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_conntrack_standalone.c	2007-08-28 17:35:32.000000000 +0400
@@ -28,6 +28,7 @@
 #include <net/checksum.h>
 #include <net/ip.h>
 #include <net/route.h>
+#include <linux/nfcalls.h>
 
 #define ASSERT_READ_LOCK(x)
 #define ASSERT_WRITE_LOCK(x)
@@ -46,9 +47,31 @@
 
 MODULE_LICENSE("GPL");
 
+int ip_conntrack_disable_ve0 = 0;
+module_param(ip_conntrack_disable_ve0, int, 0440);
+
 extern atomic_t ip_conntrack_count;
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_ip_conntrack_count \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_count)
+#else
+#define ve_ip_conntrack_count	ip_conntrack_count
+#endif
 DECLARE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
 
+/* Prior to 2.6.15, we had a ip_conntrack_enable_ve0 param. */
+static int warn_set(const char *val, struct kernel_param *kp)
+{
+	printk(KERN_INFO KBUILD_MODNAME
+	       ": parameter ip_conntrack_enable_ve0 is obsoleted. In ovzkernel"
+	       " >= 2.6.15 connection tracking on hardware node is enabled by "
+	       "default, use ip_conntrack_disable_ve0=1 parameter to "
+	       "disable.\n");
+	return 0;
+}
+module_param_call(ip_conntrack_enable_ve0, warn_set, NULL, NULL, 0);
+
 static int kill_proto(struct ip_conntrack *i, void *data)
 {
 	return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum == 
@@ -89,8 +112,8 @@ static struct list_head *ct_get_first(st
 	for (st->bucket = 0;
 	     st->bucket < ip_conntrack_htable_size;
 	     st->bucket++) {
-		if (!list_empty(&ip_conntrack_hash[st->bucket]))
-			return ip_conntrack_hash[st->bucket].next;
+		if (!list_empty(&ve_ip_conntrack_hash[st->bucket]))
+			return ve_ip_conntrack_hash[st->bucket].next;
 	}
 	return NULL;
 }
@@ -100,10 +123,10 @@ static struct list_head *ct_get_next(str
 	struct ct_iter_state *st = seq->private;
 
 	head = head->next;
-	while (head == &ip_conntrack_hash[st->bucket]) {
+	while (head == &ve_ip_conntrack_hash[st->bucket]) {
 		if (++st->bucket >= ip_conntrack_htable_size)
 			return NULL;
-		head = ip_conntrack_hash[st->bucket].next;
+		head = ve_ip_conntrack_hash[st->bucket].next;
 	}
 	return head;
 }
@@ -234,7 +257,7 @@ static struct file_operations ct_file_op
 /* expects */
 static void *exp_seq_start(struct seq_file *s, loff_t *pos)
 {
-	struct list_head *e = &ip_conntrack_expect_list;
+	struct list_head *e = &ve_ip_conntrack_expect_list;
 	loff_t i;
 
 	/* strange seq_file api calls stop even if we fail,
@@ -246,7 +269,7 @@ static void *exp_seq_start(struct seq_fi
 
 	for (i = 0; i <= *pos; i++) {
 		e = e->next;
-		if (e == &ip_conntrack_expect_list)
+		if (e == &ve_ip_conntrack_expect_list)
 			return NULL;
 	}
 	return e;
@@ -259,7 +282,7 @@ static void *exp_seq_next(struct seq_fil
 	++*pos;
 	e = e->next;
 
-	if (e == &ip_conntrack_expect_list)
+	if (e == &ve_ip_conntrack_expect_list)
 		return NULL;
 
 	return e;
@@ -344,7 +367,7 @@ static void ct_cpu_seq_stop(struct seq_f
 
 static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 {
-	unsigned int nr_conntracks = atomic_read(&ip_conntrack_count);
+	unsigned int nr_conntracks = atomic_read(&ve_ip_conntrack_count);
 	struct ip_conntrack_stat *st = v;
 
 	if (v == SEQ_START_TOKEN) {
@@ -541,6 +564,28 @@ static struct nf_hook_ops ip_conntrack_l
 
 /* From ip_conntrack_core.c */
 extern int ip_conntrack_max;
+#ifdef CONFIG_VE_IPTABLES
+#define ve_ip_conntrack_max \
+	(get_exec_env()->_ip_conntrack->_ip_conntrack_max)
+#define ve_ip_ct_sysctl_header \
+	(get_exec_env()->_ip_conntrack->_ip_ct_sysctl_header)
+#define ve_ip_ct_net_table \
+	(get_exec_env()->_ip_conntrack->_ip_ct_net_table)
+#define ve_ip_ct_ipv4_table \
+	(get_exec_env()->_ip_conntrack->_ip_ct_ipv4_table)
+#define ve_ip_ct_netfilter_table \
+	(get_exec_env()->_ip_conntrack->_ip_ct_netfilter_table)
+#define ve_ip_ct_sysctl_table \
+	(get_exec_env()->_ip_conntrack->_ip_ct_sysctl_table)
+#else
+#define ve_ip_conntrack_max		ip_conntrack_max
+static struct ctl_table_header *ip_ct_sysctl_header;
+#define ve_ip_ct_sysctl_header		ip_ct_sysctl_header
+#define ve_ip_ct_net_table		ip_ct_net_table
+#define ve_ip_ct_ipv4_table		ip_ct_ipv4_table
+#define ve_ip_ct_netfilter_table	ip_ct_netfilter_table
+#define ve_ip_ct_sysctl_table		ip_ct_sysctl_table
+#endif
 extern unsigned int ip_conntrack_htable_size;
 
 /* From ip_conntrack_proto_tcp.c */
@@ -571,8 +616,6 @@ extern unsigned int ip_ct_generic_timeou
 static int log_invalid_proto_min = 0;
 static int log_invalid_proto_max = 255;
 
-static struct ctl_table_header *ip_ct_sysctl_header;
-
 static ctl_table ip_ct_sysctl_table[] = {
 	{
 		.ctl_name	= NET_IPV4_NF_CONNTRACK_MAX,
@@ -781,6 +824,112 @@ static ctl_table ip_ct_net_table[] = {
 };
 
 EXPORT_SYMBOL(ip_ct_log_invalid);
+
+#ifdef CONFIG_VE_IPTABLES
+static void ip_conntrack_sysctl_cleanup(void)
+{
+	if (!ve_is_super(get_exec_env())) {
+		kfree(ve_ip_ct_net_table);
+		kfree(ve_ip_ct_ipv4_table);
+		kfree(ve_ip_ct_netfilter_table);
+		kfree(ve_ip_ct_sysctl_table);
+	}
+	ve_ip_ct_net_table = NULL;
+	ve_ip_ct_ipv4_table = NULL;
+	ve_ip_ct_netfilter_table = NULL;
+	ve_ip_ct_sysctl_table = NULL;
+}
+
+#define ALLOC_ENVCTL(field,k,label) \
+		if ( !(field = kmalloc(k*sizeof(ctl_table), GFP_KERNEL)) ) \
+				goto label;
+static int ip_conntrack_sysctl_init(void)
+{
+	int i, ret = 0;
+
+	ret = -ENOMEM;
+	if (ve_is_super(get_exec_env())) {
+		ve_ip_ct_net_table = ip_ct_net_table;
+		ve_ip_ct_ipv4_table = ip_ct_ipv4_table;
+		ve_ip_ct_netfilter_table = ip_ct_netfilter_table;
+		ve_ip_ct_sysctl_table = ip_ct_sysctl_table;
+	} else {
+		/* allocate structures in ve_struct */
+		ALLOC_ENVCTL(ve_ip_ct_net_table, 2, out);
+		ALLOC_ENVCTL(ve_ip_ct_ipv4_table, 2, nomem_1);
+		ALLOC_ENVCTL(ve_ip_ct_netfilter_table, 3, nomem_2);
+		ALLOC_ENVCTL(ve_ip_ct_sysctl_table, 21, nomem_3);
+
+		memcpy(ve_ip_ct_net_table, ip_ct_net_table,
+				2*sizeof(ctl_table));
+		memcpy(ve_ip_ct_ipv4_table, ip_ct_ipv4_table,
+				2*sizeof(ctl_table));
+		memcpy(ve_ip_ct_netfilter_table, ip_ct_netfilter_table,
+				3*sizeof(ctl_table));
+		memcpy(ve_ip_ct_sysctl_table, ip_ct_sysctl_table,
+				21*sizeof(ctl_table));
+
+		ve_ip_ct_net_table[0].child = ve_ip_ct_ipv4_table;
+		ve_ip_ct_ipv4_table[0].child = ve_ip_ct_netfilter_table;
+		ve_ip_ct_netfilter_table[0].child = ve_ip_ct_sysctl_table;
+	}
+	ve_ip_ct_sysctl_table[0].data = &ve_ip_conntrack_max;
+	ve_ip_ct_netfilter_table[1].data = &ve_ip_conntrack_max;
+	ve_ip_ct_sysctl_table[1].data = &ve_ip_conntrack_count;
+	/* skip ve_ip_ct_sysctl_table[2].data as it is read-only and common
+	 * for all environments */
+	ve_ip_ct_tcp_timeouts[1] = ip_ct_tcp_timeout_syn_sent;
+	ve_ip_ct_sysctl_table[3].data = &ve_ip_ct_tcp_timeouts[1];
+	ve_ip_ct_tcp_timeouts[2] = ip_ct_tcp_timeout_syn_recv;
+	ve_ip_ct_sysctl_table[4].data = &ve_ip_ct_tcp_timeouts[2];
+	ve_ip_ct_tcp_timeouts[3] = ip_ct_tcp_timeout_established;
+	ve_ip_ct_sysctl_table[5].data = &ve_ip_ct_tcp_timeouts[3];
+	ve_ip_ct_tcp_timeouts[4] = ip_ct_tcp_timeout_fin_wait;
+	ve_ip_ct_sysctl_table[6].data = &ve_ip_ct_tcp_timeouts[4];
+	ve_ip_ct_tcp_timeouts[5] = ip_ct_tcp_timeout_close_wait;
+	ve_ip_ct_sysctl_table[7].data = &ve_ip_ct_tcp_timeouts[5];
+	ve_ip_ct_tcp_timeouts[6] = ip_ct_tcp_timeout_last_ack;
+	ve_ip_ct_sysctl_table[8].data = &ve_ip_ct_tcp_timeouts[6];
+	ve_ip_ct_tcp_timeouts[7] = ip_ct_tcp_timeout_time_wait;
+	ve_ip_ct_sysctl_table[9].data = &ve_ip_ct_tcp_timeouts[7];
+	ve_ip_ct_tcp_timeouts[8] = ip_ct_tcp_timeout_close;
+	ve_ip_ct_sysctl_table[10].data = &ve_ip_ct_tcp_timeouts[8];
+	ve_ip_ct_udp_timeout = ip_ct_udp_timeout;
+	ve_ip_ct_sysctl_table[11].data = &ve_ip_ct_udp_timeout;
+	ve_ip_ct_udp_timeout_stream = ip_ct_udp_timeout_stream;
+	ve_ip_ct_sysctl_table[12].data = &ve_ip_ct_udp_timeout_stream;
+	ve_ip_ct_icmp_timeout = ip_ct_icmp_timeout;
+	ve_ip_ct_sysctl_table[13].data = &ve_ip_ct_icmp_timeout;
+	ve_ip_ct_generic_timeout = ip_ct_generic_timeout;
+	ve_ip_ct_sysctl_table[14].data = &ve_ip_ct_generic_timeout;
+	ve_ip_ct_log_invalid = ip_ct_log_invalid;
+	ve_ip_ct_sysctl_table[15].data = &ve_ip_ct_log_invalid;
+	ve_ip_ct_tcp_timeout_max_retrans = ip_ct_tcp_timeout_max_retrans;
+	ve_ip_ct_sysctl_table[16].data = &ve_ip_ct_tcp_timeout_max_retrans;
+	ve_ip_ct_tcp_loose = ip_ct_tcp_loose;
+	ve_ip_ct_sysctl_table[17].data = &ve_ip_ct_tcp_loose;
+	ve_ip_ct_tcp_be_liberal = ip_ct_tcp_be_liberal;
+	ve_ip_ct_sysctl_table[18].data = &ve_ip_ct_tcp_be_liberal;
+	ve_ip_ct_tcp_max_retrans = ip_ct_tcp_max_retrans;
+	ve_ip_ct_sysctl_table[19].data = &ve_ip_ct_tcp_max_retrans;
+	for (i = 0; i < 20; i++)
+		ve_ip_ct_sysctl_table[i].owner_env = get_exec_env();
+	ve_ip_ct_netfilter_table[1].owner_env = get_exec_env();
+	return 0;
+
+nomem_3:
+	kfree(ve_ip_ct_netfilter_table);
+	ve_ip_ct_netfilter_table = NULL;
+nomem_2:
+	kfree(ve_ip_ct_ipv4_table);
+	ve_ip_ct_ipv4_table = NULL;
+nomem_1:
+	kfree(ve_ip_ct_net_table);
+	ve_ip_ct_net_table = NULL;
+out:
+	return ret;
+}
+#endif /*CONFIG_VE*/
 #endif /* CONFIG_SYSCTL */
 
 static int init_or_cleanup(int init)
@@ -792,9 +941,16 @@ static int init_or_cleanup(int init)
 
 	if (!init) goto cleanup;
 
+	ret = -ENOENT;
+	if (!ve_is_super(get_exec_env()))
+		__module_get(THIS_MODULE);
+
 	ret = ip_conntrack_init();
 	if (ret < 0)
-		goto cleanup_nothing;
+		goto cleanup_unget;
+
+	if (ve_is_super(get_exec_env()) && ip_conntrack_disable_ve0)
+		return 0;
 
 #ifdef CONFIG_PROC_FS
 	ret = -ENOMEM;
@@ -804,13 +960,16 @@ static int init_or_cleanup(int init)
 	proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440,
 					&exp_file_ops);
 	if (!proc_exp) goto cleanup_proc;
+	proc_exp->proc_fops = &exp_file_ops;
 
-	proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat);
-	if (!proc_stat)
-		goto cleanup_proc_exp;
+	if (ve_is_super(get_exec_env())) {
+		proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat);
+		if (!proc_stat)
+			goto cleanup_proc_exp;
 
-	proc_stat->proc_fops = &ct_cpu_seq_fops;
-	proc_stat->owner = THIS_MODULE;
+		proc_stat->proc_fops = &ct_cpu_seq_fops;
+		proc_stat->owner = THIS_MODULE;
+	}
 #endif
 
 	ret = nf_register_hook(&ip_conntrack_defrag_ops);
@@ -854,21 +1013,32 @@ static int init_or_cleanup(int init)
 		goto cleanup_inoutandlocalops;
 	}
 #ifdef CONFIG_SYSCTL
-	ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table, 0);
-	if (ip_ct_sysctl_header == NULL) {
+#ifdef CONFIG_VE_IPTABLES
+	ret = ip_conntrack_sysctl_init();
+	if (ret < 0)
+		goto cleanup_sysctl;
+#endif
+	ret = -ENOMEM;
+	ve_ip_ct_sysctl_header = register_sysctl_table(ve_ip_ct_net_table, 0);
+	if (ve_ip_ct_sysctl_header == NULL) {
 		printk("ip_conntrack: can't register to sysctl.\n");
-		ret = -ENOMEM;
-		goto cleanup_localinops;
+		goto cleanup_sysctl2;
 	}
 #endif
 
-	return ret;
+	return 0;
 
  cleanup:
+	if (ve_is_super(get_exec_env()) && ip_conntrack_disable_ve0)
+		goto cleanup_init;
 	synchronize_net();
 #ifdef CONFIG_SYSCTL
- 	unregister_sysctl_table(ip_ct_sysctl_header);
- cleanup_localinops:
+ 	unregister_sysctl_table(ve_ip_ct_sysctl_header);
+ cleanup_sysctl2:
+#ifdef CONFIG_VE_IPTABLES
+	ip_conntrack_sysctl_cleanup();
+ cleanup_sysctl:
+#endif
 #endif
 	nf_unregister_hook(&ip_conntrack_local_in_ops);
  cleanup_inoutandlocalops:
@@ -887,15 +1057,18 @@ static int init_or_cleanup(int init)
 	nf_unregister_hook(&ip_conntrack_defrag_ops);
  cleanup_proc_stat:
 #ifdef CONFIG_PROC_FS
-	remove_proc_entry("ip_conntrack", proc_net_stat);
+	if (ve_is_super(get_exec_env()))
+		remove_proc_entry("ip_conntrack", proc_net_stat);
  cleanup_proc_exp:
 	proc_net_remove("ip_conntrack_expect");
  cleanup_proc:
 	proc_net_remove("ip_conntrack");
- cleanup_init:
 #endif /* CONFIG_PROC_FS */
+ cleanup_init:
 	ip_conntrack_cleanup();
- cleanup_nothing:
+ cleanup_unget:
+	if (!ve_is_super(get_exec_env()))
+		module_put(THIS_MODULE);
 	return ret;
 }
 
@@ -906,11 +1079,11 @@ int ip_conntrack_protocol_register(struc
 	int ret = 0;
 
 	write_lock_bh(&ip_conntrack_lock);
-	if (ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) {
+	if (ve_ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) {
 		ret = -EBUSY;
 		goto out;
 	}
-	ip_ct_protos[proto->proto] = proto;
+	ve_ip_ct_protos[proto->proto] = proto;
  out:
 	write_unlock_bh(&ip_conntrack_lock);
 	return ret;
@@ -919,7 +1092,7 @@ int ip_conntrack_protocol_register(struc
 void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto)
 {
 	write_lock_bh(&ip_conntrack_lock);
-	ip_ct_protos[proto->proto] = &ip_conntrack_generic_protocol;
+	ve_ip_ct_protos[proto->proto] = &ip_conntrack_generic_protocol;
 	write_unlock_bh(&ip_conntrack_lock);
 	
 	/* Somebody could be still looking at the proto in bh. */
@@ -929,17 +1102,39 @@ void ip_conntrack_protocol_unregister(st
 	ip_ct_iterate_cleanup(kill_proto, &proto->proto);
 }
 
-static int __init init(void)
+int init_iptable_conntrack(void)
 {
 	return init_or_cleanup(1);
 }
 
-static void __exit fini(void)
+void fini_iptable_conntrack(void)
 {
 	init_or_cleanup(0);
 }
 
-module_init(init);
+static int __init init(void)
+{
+	int err;
+
+	err = init_iptable_conntrack();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_conntrack);
+	KSYMRESOLVE(fini_iptable_conntrack);
+	KSYMMODRESOLVE(ip_conntrack);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(ip_conntrack);
+	KSYMUNRESOLVE(init_iptable_conntrack);
+	KSYMUNRESOLVE(fini_iptable_conntrack);
+	fini_iptable_conntrack();
+}
+
+subsys_initcall(init);
 module_exit(fini);
 
 /* Some modules need us, but don't depend directly on any symbol.
@@ -956,15 +1151,20 @@ EXPORT_SYMBOL_GPL(ip_conntrack_unregiste
 EXPORT_SYMBOL_GPL(__ip_ct_event_cache_init);
 EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache);
 #endif
+EXPORT_SYMBOL(ip_conntrack_disable_ve0);
 EXPORT_SYMBOL(ip_conntrack_protocol_register);
 EXPORT_SYMBOL(ip_conntrack_protocol_unregister);
 EXPORT_SYMBOL(ip_ct_get_tuple);
 EXPORT_SYMBOL(invert_tuplepr);
 EXPORT_SYMBOL(ip_conntrack_alter_reply);
+#ifndef CONFIG_VE_IPTABLES
 EXPORT_SYMBOL(ip_conntrack_destroyed);
+#endif
 EXPORT_SYMBOL(need_conntrack);
 EXPORT_SYMBOL(ip_conntrack_helper_register);
 EXPORT_SYMBOL(ip_conntrack_helper_unregister);
+EXPORT_SYMBOL(virt_ip_conntrack_helper_register);
+EXPORT_SYMBOL(virt_ip_conntrack_helper_unregister);
 EXPORT_SYMBOL(ip_ct_iterate_cleanup);
 EXPORT_SYMBOL(__ip_ct_refresh_acct);
 
@@ -974,14 +1174,18 @@ EXPORT_SYMBOL_GPL(__ip_conntrack_expect_
 EXPORT_SYMBOL_GPL(ip_conntrack_expect_find);
 EXPORT_SYMBOL(ip_conntrack_expect_related);
 EXPORT_SYMBOL(ip_conntrack_unexpect_related);
+#ifndef CONFIG_VE_IPTABLES
 EXPORT_SYMBOL_GPL(ip_conntrack_expect_list);
+#endif
 EXPORT_SYMBOL_GPL(ip_ct_unlink_expect);
 
 EXPORT_SYMBOL(ip_conntrack_tuple_taken);
 EXPORT_SYMBOL(ip_ct_gather_frags);
 EXPORT_SYMBOL(ip_conntrack_htable_size);
 EXPORT_SYMBOL(ip_conntrack_lock);
+#ifndef CONFIG_VE_IPTABLES
 EXPORT_SYMBOL(ip_conntrack_hash);
+#endif
 EXPORT_SYMBOL(ip_conntrack_untracked);
 EXPORT_SYMBOL_GPL(ip_conntrack_find_get);
 #ifdef CONFIG_IP_NF_NAT_NEEDED
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_nat_core.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_nat_core.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_nat_core.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_nat_core.c	2007-08-28 17:35:33.000000000 +0400
@@ -21,6 +21,8 @@
 #include <linux/icmp.h>
 #include <linux/udp.h>
 #include <linux/jhash.h>
+#include <linux/nfcalls.h>
+#include <ub/ub_mem.h>
 
 #define ASSERT_READ_LOCK(x)
 #define ASSERT_WRITE_LOCK(x)
@@ -46,15 +48,24 @@ DEFINE_RWLOCK(ip_nat_lock);
 /* Calculated at init based on memory size */
 static unsigned int ip_nat_htable_size;
 
-static struct list_head *bysource;
-
 #define MAX_IP_NAT_PROTO 256
+
+#ifdef CONFIG_VE_IPTABLES
+#define ve_ip_nat_bysource	\
+	(get_exec_env()->_ip_conntrack->_ip_nat_bysource)
+#define ve_ip_nat_protos	\
+	(get_exec_env()->_ip_conntrack->_ip_nat_protos)
+#else
+static struct list_head *bysource;
+#define ve_ip_nat_bysource	bysource
 static struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
+#define ve_ip_nat_protos	ip_nat_protos
+#endif
 
 static inline struct ip_nat_protocol *
 __ip_nat_proto_find(u_int8_t protonum)
 {
-	return ip_nat_protos[protonum];
+	return ve_ip_nat_protos[protonum];
 }
 
 struct ip_nat_protocol *
@@ -177,7 +188,7 @@ find_appropriate_src(const struct ip_con
 	struct ip_conntrack *ct;
 
 	read_lock_bh(&ip_nat_lock);
-	list_for_each_entry(ct, &bysource[h], nat.info.bysource) {
+	list_for_each_entry(ct, &ve_ip_nat_bysource[h], nat.info.bysource) {
 		if (same_src(ct, tuple)) {
 			/* Copy source part from reply tuple. */
 			invert_tuplepr(result,
@@ -291,13 +302,22 @@ get_unique_tuple(struct ip_conntrack_tup
 	ip_nat_proto_put(proto);
 }
 
+void ip_nat_hash_conntrack(struct ip_conntrack *conntrack)
+{
+	unsigned int srchash
+		= hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	write_lock_bh(&ip_nat_lock);
+	list_add(&conntrack->nat.info.bysource, &ve_ip_nat_bysource[srchash]);
+	write_unlock_bh(&ip_nat_lock);
+}
+EXPORT_SYMBOL_GPL(ip_nat_hash_conntrack);
+
 unsigned int
 ip_nat_setup_info(struct ip_conntrack *conntrack,
 		  const struct ip_nat_range *range,
 		  unsigned int hooknum)
 {
 	struct ip_conntrack_tuple curr_tuple, new_tuple;
-	struct ip_nat_info *info = &conntrack->nat.info;
 	int have_to_hash = !(conntrack->status & IPS_NAT_DONE_MASK);
 	enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
 
@@ -332,14 +352,8 @@ ip_nat_setup_info(struct ip_conntrack *c
 	}
 
 	/* Place in source hash if this is the first time. */
-	if (have_to_hash) {
-		unsigned int srchash
-			= hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
-				      .tuple);
-		write_lock_bh(&ip_nat_lock);
-		list_add(&info->bysource, &bysource[srchash]);
-		write_unlock_bh(&ip_nat_lock);
-	}
+	if (have_to_hash)
+		ip_nat_hash_conntrack(conntrack);
 
 	/* It's done. */
 	if (maniptype == IP_NAT_MANIP_DST)
@@ -521,11 +535,11 @@ int ip_nat_protocol_register(struct ip_n
 	int ret = 0;
 
 	write_lock_bh(&ip_nat_lock);
-	if (ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) {
+	if (ve_ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) {
 		ret = -EBUSY;
 		goto out;
 	}
-	ip_nat_protos[proto->protonum] = proto;
+	ve_ip_nat_protos[proto->protonum] = proto;
  out:
 	write_unlock_bh(&ip_nat_lock);
 	return ret;
@@ -536,7 +550,7 @@ EXPORT_SYMBOL(ip_nat_protocol_register);
 void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
 {
 	write_lock_bh(&ip_nat_lock);
-	ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol;
+	ve_ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol;
 	write_unlock_bh(&ip_nat_lock);
 
 	/* Someone could be still looking at the proto in a bh. */
@@ -589,38 +603,55 @@ EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_
 EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr);
 #endif
 
-static int __init ip_nat_init(void)
+static int ip_nat_init(void)
 {
 	size_t i;
+	int ret;
 
-	/* Leave them the same for the moment. */
-	ip_nat_htable_size = ip_conntrack_htable_size;
+	if (ve_is_super(get_exec_env()))
+		ip_nat_htable_size = ip_conntrack_htable_size;
 
 	/* One vmalloc for both hash tables */
-	bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size);
-	if (!bysource)
-		return -ENOMEM;
+	ret = -ENOMEM;
+	ve_ip_nat_bysource =
+		ub_vmalloc(sizeof(struct list_head)*ip_nat_htable_size*2);
+	if (!ve_ip_nat_bysource)
+		goto nomem;
+
+#ifdef CONFIG_VE_IPTABLES
+	ve_ip_nat_protos =
+		ub_kmalloc(sizeof(void *)*MAX_IP_NAT_PROTO, GFP_KERNEL);
+	if (!ve_ip_nat_protos)
+		goto nomem2;
+#endif
 
 	/* Sew in builtin protocols. */
 	write_lock_bh(&ip_nat_lock);
 	for (i = 0; i < MAX_IP_NAT_PROTO; i++)
-		ip_nat_protos[i] = &ip_nat_unknown_protocol;
-	ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp;
-	ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp;
-	ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp;
+		ve_ip_nat_protos[i] = &ip_nat_unknown_protocol;
+	ve_ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp;
+	ve_ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp;
+	ve_ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp;
 	write_unlock_bh(&ip_nat_lock);
 
 	for (i = 0; i < ip_nat_htable_size; i++) {
-		INIT_LIST_HEAD(&bysource[i]);
+		INIT_LIST_HEAD(&ve_ip_nat_bysource[i]);
 	}
 
 	/* FIXME: Man, this is a hack.  <SIGH> */
-	IP_NF_ASSERT(ip_conntrack_destroyed == NULL);
-	ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
+	IP_NF_ASSERT(ve_ip_conntrack_destroyed == NULL);
+	ve_ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
 
-	/* Initialize fake conntrack so that NAT will skip it */
-	ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
+	if (ve_is_super(get_exec_env()))
+		/* Initialize fake conntrack so that NAT will skip it */
+		ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
 	return 0;
+#ifdef CONFIG_VE_IPTABLES
+nomem2:
+#endif
+	vfree(ve_ip_nat_bysource);
+nomem:
+	return ret;
 }
 
 /* Clear NAT section of all conntracks, in case we're loaded again. */
@@ -631,14 +662,41 @@ static int clean_nat(struct ip_conntrack
 	return 0;
 }
 
-static void __exit ip_nat_cleanup(void)
+static void ip_nat_cleanup(void)
 {
 	ip_ct_iterate_cleanup(&clean_nat, NULL);
-	ip_conntrack_destroyed = NULL;
-	vfree(bysource);
+	ve_ip_conntrack_destroyed = NULL;
+	vfree(ve_ip_nat_bysource);
+	ve_ip_nat_bysource = NULL;
+#ifdef CONFIG_VE_IPTABLES
+	kfree(ve_ip_nat_protos);
+	ve_ip_nat_protos = NULL;
+#endif
+}
+
+static int __init init(void)
+{
+	int err;
+
+	err = ip_nat_init();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(ip_nat_init);
+	KSYMRESOLVE(ip_nat_cleanup);
+	KSYMMODRESOLVE(ip_nat);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(ip_nat);
+	KSYMUNRESOLVE(ip_nat_cleanup);
+	KSYMUNRESOLVE(ip_nat_init);
+	ip_nat_cleanup();
 }
 
 MODULE_LICENSE("GPL");
 
-module_init(ip_nat_init);
-module_exit(ip_nat_cleanup);
+fs_initcall(init);
+module_exit(fini);
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_nat_ftp.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_nat_ftp.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_nat_ftp.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_nat_ftp.c	2007-08-28 17:35:32.000000000 +0400
@@ -19,6 +19,7 @@
 #include <linux/netfilter_ipv4/ip_nat_rule.h>
 #include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/nfcalls.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
@@ -154,18 +155,43 @@ static unsigned int ip_nat_ftp(struct sk
 	return NF_ACCEPT;
 }
 
-static void __exit fini(void)
+#ifdef CONFIG_VE_IPTABLES
+#undef ve_ip_nat_ftp_hook
+#define ve_ip_nat_ftp_hook \
+		(get_exec_env()->_ip_conntrack->_ip_nat_ftp_hook)
+#endif
+int init_iptable_nat_ftp(void)
 {
-	ip_nat_ftp_hook = NULL;
+	BUG_ON(ve_ip_nat_ftp_hook);
+#ifdef CONFIG_VE_IPTABLES
+	ve_ip_nat_ftp_hook = (ip_nat_helper_func)ip_nat_ftp;
+#else
+	ve_ip_nat_ftp_hook = ip_nat_ftp;
+#endif
+	return 0;
+}
+
+void fini_iptable_nat_ftp(void)
+{
+	ve_ip_nat_ftp_hook = NULL;
 	/* Make sure noone calls it, meanwhile. */
 	synchronize_net();
 }
 
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(ip_nat_ftp);
+	KSYMUNRESOLVE(init_iptable_nat_ftp);
+	KSYMUNRESOLVE(fini_iptable_nat_ftp);
+	fini_iptable_nat_ftp();
+}
+
 static int __init init(void)
 {
-	BUG_ON(ip_nat_ftp_hook);
-	ip_nat_ftp_hook = ip_nat_ftp;
-	return 0;
+	KSYMRESOLVE(init_iptable_nat_ftp);
+	KSYMRESOLVE(fini_iptable_nat_ftp);
+	KSYMMODRESOLVE(ip_nat_ftp);
+	return init_iptable_nat_ftp();
 }
 
 /* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_nat_irc.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_nat_irc.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_nat_irc.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_nat_irc.c	2007-08-28 17:35:32.000000000 +0400
@@ -23,6 +23,7 @@
 #include <linux/netfilter_ipv4/ip_conntrack_irc.h>
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
 #include <linux/moduleparam.h>
+#include <linux/nfcalls.h>
 
 #if 0
 #define DEBUGP printk
@@ -96,18 +97,44 @@ static unsigned int help(struct sk_buff 
 	return ret;
 }
 
-static void __exit fini(void)
+#ifdef CONFIG_VE_IPTABLES
+#undef ve_ip_nat_irc_hook
+#define ve_ip_nat_irc_hook \
+		(get_exec_env()->_ip_conntrack->_ip_nat_irc_hook)
+#endif
+
+int init_iptable_nat_irc(void)
+{
+	BUG_ON(ve_ip_nat_irc_hook);
+#ifdef CONFIG_VE_IPTABLES
+	ve_ip_nat_irc_hook = (ip_nat_helper_func)help;
+#else
+	ve_ip_nat_irc_hook = help;
+#endif
+	return 0;
+}
+
+void fini_iptable_nat_irc(void)
 {
-	ip_nat_irc_hook = NULL;
+	ve_ip_nat_irc_hook = NULL;
 	/* Make sure noone calls it, meanwhile. */
 	synchronize_net();
 }
 
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(ip_nat_irc);
+	KSYMUNRESOLVE(init_iptable_nat_irc);
+	KSYMUNRESOLVE(fini_iptable_nat_irc);
+	fini_iptable_nat_irc();
+}
+
 static int __init init(void)
 {
-	BUG_ON(ip_nat_irc_hook);
-	ip_nat_irc_hook = help;
-	return 0;
+	KSYMRESOLVE(init_iptable_nat_irc);
+	KSYMRESOLVE(fini_iptable_nat_irc);
+	KSYMMODRESOLVE(ip_nat_irc);
+	return init_iptable_nat_irc();
 }
 
 /* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_nat_rule.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_nat_rule.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_nat_rule.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_nat_rule.c	2007-08-28 17:35:32.000000000 +0400
@@ -34,6 +34,13 @@
 #define DEBUGP(format, args...)
 #endif
 
+#ifdef CONFIG_VE_IPTABLES
+#define ve_ip_nat_table		\
+	(get_exec_env()->_ip_conntrack->_ip_nat_table)
+#else
+#define ve_ip_nat_table		&nat_table
+#endif
+
 #define NAT_VALID_HOOKS ((1<<NF_IP_PRE_ROUTING) | (1<<NF_IP_POST_ROUTING) | (1<<NF_IP_LOCAL_OUT))
 
 static struct
@@ -41,7 +48,7 @@ static struct
 	struct ipt_replace repl;
 	struct ipt_standard entries[3];
 	struct ipt_error term;
-} nat_initial_table __initdata
+} nat_initial_table
 = { { "nat", NAT_VALID_HOOKS, 4,
       sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
       { [NF_IP_PRE_ROUTING] = 0,
@@ -235,6 +242,93 @@ static int ipt_dnat_checkentry(const cha
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat_to_user(void *target, void **dstptr,
+		int *size, int off)
+{
+	struct ipt_entry_target *pt;
+	struct ip_nat_multi_range_compat *pinfo;
+	struct compat_ip_nat_multi_range info;
+	u_int16_t tsize;
+
+	pt = (struct ipt_entry_target *)target;
+	tsize = pt->u.user.target_size;
+	if (__copy_to_user(*dstptr, pt, sizeof(struct ipt_entry_target)))
+		return -EFAULT;
+	pinfo = (struct ip_nat_multi_range_compat *)pt->data;
+	memset(&info, 0, sizeof(struct compat_ip_nat_multi_range));
+	info.rangesize = pinfo->rangesize;
+	info.range[0].flags = pinfo->range[0].flags;
+	info.range[0].min_ip = pinfo->range[0].min_ip;
+	info.range[0].max_ip = pinfo->range[0].max_ip;
+	info.range[0].min = pinfo->range[0].min;
+	info.range[0].max = pinfo->range[0].max;
+	if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_target),
+				&info, sizeof(struct compat_ip_nat_multi_range)))
+		return -EFAULT;
+	tsize -= off;
+	if (put_user(tsize, (u_int16_t *)*dstptr))
+		return -EFAULT;
+	*size -= off;
+	*dstptr += tsize;
+	return 0;
+}
+
+static int compat_from_user(void *target, void **dstptr,
+		int *size, int off)
+{
+	struct compat_ipt_entry_target *pt;
+	struct ipt_entry_target *dstpt;
+	struct compat_ip_nat_multi_range *pinfo;
+	struct ip_nat_multi_range_compat info;
+	u_int16_t tsize;
+
+	pt = (struct compat_ipt_entry_target *)target;
+	dstpt = (struct ipt_entry_target *)*dstptr;
+	tsize = pt->u.user.target_size;
+	memcpy(*dstptr, pt, sizeof(struct compat_ipt_entry_target));
+	pinfo = (struct compat_ip_nat_multi_range *)pt->data;
+	memset(&info, 0, sizeof(struct ip_nat_multi_range_compat));
+	info.rangesize = pinfo->rangesize;
+	info.range[0].flags = pinfo->range[0].flags;
+	info.range[0].min_ip = pinfo->range[0].min_ip;
+	info.range[0].max_ip = pinfo->range[0].max_ip;
+	info.range[0].min = pinfo->range[0].min;
+	info.range[0].max = pinfo->range[0].max;
+	memcpy(*dstptr + sizeof(struct compat_ipt_entry_target),
+			&info, sizeof(struct ip_nat_multi_range_compat));
+	tsize += off;
+	dstpt->u.user.target_size = tsize;
+	*size += off;
+	*dstptr += tsize;
+	return 0;
+}
+
+static int compat(void *target, void **dstptr, int *size, int convert)
+{
+	int ret, off;
+
+	off = IPT_ALIGN(sizeof(struct ip_nat_multi_range_compat)) -
+		COMPAT_IPT_ALIGN(sizeof(struct compat_ip_nat_multi_range));
+	switch (convert) {
+		case COMPAT_TO_USER:
+			ret = compat_to_user(target, dstptr, size, off);
+			break;
+		case COMPAT_FROM_USER:
+			ret = compat_from_user(target, dstptr, size, off);
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			ret = 0;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+#endif
+
 inline unsigned int
 alloc_null_binding(struct ip_conntrack *conntrack,
 		   struct ip_nat_info *info,
@@ -286,7 +380,7 @@ int ip_nat_rule_find(struct sk_buff **ps
 {
 	int ret;
 
-	ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL);
+	ret = ipt_do_table(pskb, hooknum, in, out, ve_ip_nat_table, NULL);
 
 	if (ret == NF_ACCEPT) {
 		if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
@@ -300,42 +394,63 @@ static struct ipt_target ipt_snat_reg = 
 	.name		= "SNAT",
 	.target		= ipt_snat_target,
 	.checkentry	= ipt_snat_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 };
 
 static struct ipt_target ipt_dnat_reg = {
 	.name		= "DNAT",
 	.target		= ipt_dnat_target,
 	.checkentry	= ipt_dnat_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 };
 
-int __init ip_nat_rule_init(void)
+int ip_nat_rule_init(void)
 {
 	int ret;
+	struct ipt_table *tmp_table;
 
-	ret = ipt_register_table(&nat_table, &nat_initial_table.repl);
-	if (ret != 0)
-		return ret;
-	ret = ipt_register_target(&ipt_snat_reg);
-	if (ret != 0)
-		goto unregister_table;
-
-	ret = ipt_register_target(&ipt_dnat_reg);
-	if (ret != 0)
-		goto unregister_snat;
+	tmp_table = ipt_register_table(&nat_table,
+			&nat_initial_table.repl);
+	if (IS_ERR(tmp_table))
+		return PTR_ERR(tmp_table);
+#ifdef CONFIG_VE_IPTABLES
+	ve_ip_nat_table = tmp_table;
+#endif
 
-	return ret;
+	if (ve_is_super(get_exec_env())) {
+		ret = ipt_register_target(&ipt_snat_reg);
+		if (ret != 0)
+			goto unregister_table;
+
+		ret = ipt_register_target(&ipt_dnat_reg);
+		if (ret != 0)
+			goto unregister_snat;
+	}
+	return 0;
 
  unregister_snat:
 	ipt_unregister_target(&ipt_snat_reg);
  unregister_table:
-	ipt_unregister_table(&nat_table);
+	ipt_unregister_table(ve_ip_nat_table);
+#ifdef CONFIG_VE_IPTABLES
+	ve_ip_nat_table = NULL;
+#endif
 
 	return ret;
 }
 
 void ip_nat_rule_cleanup(void)
 {
-	ipt_unregister_target(&ipt_dnat_reg);
-	ipt_unregister_target(&ipt_snat_reg);
-	ipt_unregister_table(&nat_table);
+	if (ve_is_super(get_exec_env())) {
+		ipt_unregister_target(&ipt_dnat_reg);
+		ipt_unregister_target(&ipt_snat_reg);
+	}
+	ipt_unregister_table(ve_ip_nat_table);
+#ifdef CONFIG_VE_IPTABLES
+	ve_ip_nat_table = NULL;
+#endif
 }
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_nat_standalone.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_nat_standalone.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_nat_standalone.c	2007-08-24 19:28:09.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_nat_standalone.c	2007-08-28 17:35:32.000000000 +0400
@@ -30,6 +30,7 @@
 #include <net/ip.h>
 #include <net/checksum.h>
 #include <linux/spinlock.h>
+#include <linux/nfcalls.h>
 
 #define ASSERT_READ_LOCK(x)
 #define ASSERT_WRITE_LOCK(x)
@@ -359,19 +360,19 @@ static int init_or_cleanup(int init)
 {
 	int ret = 0;
 
-	need_conntrack();
-
 	if (!init) goto cleanup;
 
-#ifdef CONFIG_XFRM
-	BUG_ON(ip_nat_decode_session != NULL);
-	ip_nat_decode_session = nat_decode_session;
-#endif
+	if (!ve_is_super(get_exec_env()))
+		__module_get(THIS_MODULE);
+
 	ret = ip_nat_rule_init();
 	if (ret < 0) {
 		printk("ip_nat_init: can't setup rules.\n");
-		goto cleanup_decode_session;
+ 		goto cleanup_modput;
 	}
+	if (ve_is_super(get_exec_env()) && ip_conntrack_disable_ve0)
+		return 0;
+
 	ret = nf_register_hook(&ip_nat_in_ops);
 	if (ret < 0) {
 		printk("ip_nat_init: can't register in hook.\n");
@@ -405,6 +406,8 @@ static int init_or_cleanup(int init)
 	return ret;
 
  cleanup:
+	if (ve_is_super(get_exec_env()) && ip_conntrack_disable_ve0)
+		goto cleanup_rule_init;
 	nf_unregister_hook(&ip_nat_local_in_ops);
  cleanup_localoutops:
 	nf_unregister_hook(&ip_nat_local_out_ops);
@@ -418,25 +421,62 @@ static int init_or_cleanup(int init)
 	nf_unregister_hook(&ip_nat_in_ops);
  cleanup_rule_init:
 	ip_nat_rule_cleanup();
- cleanup_decode_session:
-#ifdef CONFIG_XFRM
-	ip_nat_decode_session = NULL;
-	synchronize_net();
-#endif
+ cleanup_modput:
+	if (!ve_is_super(get_exec_env()))
+		module_put(THIS_MODULE);
 	return ret;
 }
 
-static int __init init(void)
+int init_iptable_nat(void)
 {
 	return init_or_cleanup(1);
 }
 
-static void __exit fini(void)
+void fini_iptable_nat(void)
 {
 	init_or_cleanup(0);
 }
 
-module_init(init);
+static int __init init(void)
+{
+	int err;
+
+	need_conntrack();
+
+#ifdef CONFIG_XFRM
+	BUG_ON(ip_nat_decode_session != NULL);
+	ip_nat_decode_session = nat_decode_session;
+#endif
+
+	err = init_iptable_nat();
+	if (err < 0) {
+#ifdef CONFIG_XFRM
+		ip_nat_decode_session = NULL;
+		synchronize_net();
+#endif
+		return err;
+	}
+
+	KSYMRESOLVE(init_iptable_nat);
+	KSYMRESOLVE(fini_iptable_nat);
+	KSYMMODRESOLVE(iptable_nat);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(iptable_nat);
+	KSYMUNRESOLVE(init_iptable_nat);
+	KSYMUNRESOLVE(fini_iptable_nat);
+	if (!ip_conntrack_disable_ve0)
+		fini_iptable_nat();
+#ifdef CONFIG_XFRM
+	ip_nat_decode_session = NULL;
+	synchronize_net();
+#endif
+}
+
+fs_initcall(init);
 module_exit(fini);
 
 MODULE_LICENSE("GPL");
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_queue.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_queue.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_queue.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_queue.c	2007-08-28 17:35:32.000000000 +0400
@@ -538,12 +538,15 @@ ipq_rcv_sk(struct sock *sk, int len)
 {
 	struct sk_buff *skb;
 	unsigned int qlen;
+	struct ve_struct *env;
 
 	down(&ipqnl_sem);
 			
 	for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
 		skb = skb_dequeue(&sk->sk_receive_queue);
+		env = set_exec_env(skb->owner_env);
 		ipq_rcv_skb(skb);
+		(void)set_exec_env(env);
 		kfree_skb(skb);
 	}
 		
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_tables.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_tables.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ip_tables.c	2007-08-24 19:28:07.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ip_tables.c	2007-08-28 17:35:36.000000000 +0400
@@ -24,14 +24,17 @@
 #include <linux/module.h>
 #include <linux/icmp.h>
 #include <net/ip.h>
+#include <net/compat.h>
 #include <asm/uaccess.h>
 #include <asm/semaphore.h>
 #include <linux/proc_fs.h>
 #include <linux/err.h>
 #include <linux/cpumask.h>
+#include <ub/ub_mem.h>
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/nfcalls.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -387,8 +390,8 @@ mark_source_chains(struct xt_table_info 
 				= (void *)ipt_get_target(e);
 
 			if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
-				printk("iptables: loop hook %u pos %u %08X.\n",
-				       hook, pos, e->comefrom);
+				ve_printk(VE_LOG, "iptables: loop hook %u pos "
+					"%u %08X.\n", hook, pos, e->comefrom);
 				return 0;
 			}
 			e->comefrom
@@ -402,6 +405,13 @@ mark_source_chains(struct xt_table_info 
 			    && unconditional(&e->ip)) {
 				unsigned int oldpos, size;
 
+				if (t->verdict < -NF_MAX_VERDICT - 1) {
+					duprintf("mark_source_chains: bad "
+						"negative verdict (%i)\n",
+							t->verdict);
+					return 0;
+				}
+
 				/* Return: backtrack through the last
 				   big jump. */
 				do {
@@ -439,6 +449,14 @@ mark_source_chains(struct xt_table_info 
 				if (strcmp(t->target.u.user.name,
 					   IPT_STANDARD_TARGET) == 0
 				    && newpos >= 0) {
+					if (newpos > newinfo->size -
+						sizeof(struct ipt_entry)) {
+						duprintf("mark_source_chains: "
+							"bad verdict (%i)\n",
+								newpos);
+						return 0;
+					}
+
 					/* This a jump; chase it. */
 					duprintf("Jump rule %u -> %u\n",
 						 pos, newpos);
@@ -471,72 +489,96 @@ cleanup_match(struct ipt_entry_match *m,
 	return 0;
 }
 
-static inline int
-standard_check(const struct ipt_entry_target *t,
-	       unsigned int max_offset)
+static inline int check_match(struct ipt_entry_match *m, const char *name,
+				const struct ipt_ip *ip, unsigned int hookmask)
 {
-	struct ipt_standard_target *targ = (void *)t;
-
-	/* Check standard info. */
-	if (t->u.target_size
-	    != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
-		duprintf("standard_check: target size %u != %u\n",
-			 t->u.target_size,
-			 IPT_ALIGN(sizeof(struct ipt_standard_target)));
-		return 0;
-	}
-
-	if (targ->verdict >= 0
-	    && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
-		duprintf("ipt_standard_check: bad verdict (%i)\n",
-			 targ->verdict);
-		return 0;
-	}
+	struct ipt_match *match;
+	int ret;
 
-	if (targ->verdict < -NF_MAX_VERDICT - 1) {
-		duprintf("ipt_standard_check: bad negative verdict (%i)\n",
-			 targ->verdict);
-		return 0;
+	match = m->u.kernel.match;
+	ret = 0;
+	if (m->u.kernel.match->checkentry
+	    && !m->u.kernel.match->checkentry(name, ip, m->data,
+					      m->u.match_size - sizeof(*m),
+					      hookmask)) {
+		duprintf("ip_tables: check failed for `%s'.\n",
+			 m->u.kernel.match->name);
+		ret = -EINVAL;
 	}
-	return 1;
+	return ret;
 }
 
 static inline int
-check_match(struct ipt_entry_match *m,
+find_check_match(struct ipt_entry_match *m,
 	    const char *name,
 	    const struct ipt_ip *ip,
 	    unsigned int hookmask,
 	    unsigned int *i)
 {
+	int ret;
 	struct ipt_match *match;
 
 	match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
 						   m->u.user.revision),
 					"ipt_%s", m->u.user.name);
 	if (IS_ERR(match) || !match) {
-		duprintf("check_match: `%s' not found\n", m->u.user.name);
+		duprintf("find_check_match: `%s' not found\n", m->u.user.name);
 		return match ? PTR_ERR(match) : -ENOENT;
 	}
 	m->u.kernel.match = match;
 
-	if (m->u.kernel.match->checkentry
-	    && !m->u.kernel.match->checkentry(name, ip, m->data,
-					      m->u.match_size - sizeof(*m),
-					      hookmask)) {
-		module_put(m->u.kernel.match->me);
-		duprintf("ip_tables: check failed for `%s'.\n",
-			 m->u.kernel.match->name);
-		return -EINVAL;
-	}
+	ret = check_match(m, name, ip, hookmask);
+	if (ret)
+		return ret;
 
 	(*i)++;
 	return 0;
 }
 
-static struct ipt_target ipt_standard_target;
+static inline int check_target(struct ipt_entry *e, const char *name)
+{
+	struct ipt_entry_target *t;
+	struct ipt_target *target;
+	int ret;
+
+	t = ipt_get_target(e);
+	target = t->u.kernel.target;
+	ret = 0;
+	if (t->u.kernel.target->checkentry
+		   && !t->u.kernel.target->checkentry(name, e, t->data,
+						      t->u.target_size
+						      - sizeof(*t),
+						      e->comefrom)) {
+		duprintf("check_target: check failed for `%s'.\n",
+			 t->u.kernel.target->name);
+		ret = -EINVAL;
+	}
+	return ret;
+
+}
+
+static inline int check_entry(struct ipt_entry *e, const char *name)
+{
+	struct ipt_entry_target *t;
+
+	if (!ip_checkentry(&e->ip)) {
+		duprintf("check_entry: ip check failed %p %s.\n", e, name);
+		return -EINVAL;
+	}
+
+	if (e->target_offset + sizeof(struct ipt_entry_target) >
+		e->next_offset)
+		return -EINVAL;
+
+	t = ipt_get_target(e);
+	if (e->target_offset + t->u.target_size > e->next_offset)
+		return -EINVAL;
+
+	return 0;
+}
 
 static inline int
-check_entry(struct ipt_entry *e, const char *name, unsigned int size,
+find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
 	    unsigned int *i)
 {
 	struct ipt_entry_target *t;
@@ -544,13 +586,13 @@ check_entry(struct ipt_entry *e, const c
 	int ret;
 	unsigned int j;
 
-	if (!ip_checkentry(&e->ip)) {
-		duprintf("ip_tables: ip check failed %p %s.\n", e, name);
-		return -EINVAL;
-	}
+	ret = check_entry(e, name);
+	if (ret != 0)
+		return ret;
 
 	j = 0;
-	ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
+	ret = IPT_MATCH_ITERATE(e, find_check_match, name, &e->ip, e->comefrom,
+									&j);
 	if (ret != 0)
 		goto cleanup_matches;
 
@@ -566,22 +608,9 @@ check_entry(struct ipt_entry *e, const c
 	}
 	t->u.kernel.target = target;
 
-	if (t->u.kernel.target == &ipt_standard_target) {
-		if (!standard_check(t, size)) {
-			ret = -EINVAL;
-			goto cleanup_matches;
-		}
-	} else if (t->u.kernel.target->checkentry
-		   && !t->u.kernel.target->checkentry(name, e, t->data,
-						      t->u.target_size
-						      - sizeof(*t),
-						      e->comefrom)) {
-		module_put(t->u.kernel.target->me);
-		duprintf("ip_tables: check failed for `%s'.\n",
-			 t->u.kernel.target->name);
-		ret = -EINVAL;
+	ret = check_target(e, name);
+	if (ret)
 		goto cleanup_matches;
-	}
 
 	(*i)++;
 	return 0;
@@ -717,8 +746,7 @@ translate_table(const char *name,
 	/* Finally, each sanity check must pass */
 	i = 0;
 	ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
-				check_entry, name, size, &i);
-
+				find_check_entry, name, size, &i);
 	if (ret != 0) {
 		IPT_ENTRY_ITERATE(entry0, newinfo->size,
 				  cleanup_entry, &i);
@@ -791,32 +819,45 @@ get_counters(const struct xt_table_info 
 	}
 }
 
-static int
-copy_entries_to_user(unsigned int total_size,
-		     struct ipt_table *table,
-		     void __user *userptr)
+static inline struct xt_counters * alloc_counters(struct ipt_table *table)
 {
-	unsigned int off, num, countersize;
-	struct ipt_entry *e;
+	unsigned int countersize;
 	struct xt_counters *counters;
 	struct xt_table_info *private = table->private;
-	int ret = 0;
-	void *loc_cpu_entry;
 
 	/* We need atomic snapshot of counters: rest doesn't change
 	   (other than comefrom, which userspace doesn't care
 	   about). */
 	countersize = sizeof(struct xt_counters) * private->number;
-	counters = vmalloc_node(countersize, numa_node_id());
+	counters = ub_vmalloc_node(countersize, numa_node_id());
 
 	if (counters == NULL)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	/* First, sum counters... */
 	write_lock_bh(&table->lock);
 	get_counters(private, counters);
 	write_unlock_bh(&table->lock);
 
+	return counters;
+}
+
+static int
+copy_entries_to_user(unsigned int total_size,
+		     struct ipt_table *table,
+		     void __user *userptr)
+{
+	unsigned int off, num;
+	struct ipt_entry *e;
+	struct xt_counters *counters;
+	struct xt_table_info *private = table->private;
+	int ret = 0;
+	void *loc_cpu_entry;
+
+	counters = alloc_counters(table);
+	if (IS_ERR(counters))
+		return PTR_ERR(counters);
+
 	/* choose the copy that is on our node/cpu, ...
 	 * This choice is lazy (because current thread is
 	 * allowed to migrate to another cpu)
@@ -876,25 +917,386 @@ copy_entries_to_user(unsigned int total_
 	return ret;
 }
 
+#ifdef CONFIG_COMPAT
+static DECLARE_MUTEX(compat_ipt_mutex);
+
+struct compat_delta {
+	struct compat_delta *next;
+	unsigned int offset;
+	short delta;
+};
+
+static struct compat_delta *compat_offsets = NULL;
+
+static int compat_add_offset(unsigned int offset, short delta)
+{
+	struct compat_delta *tmp;
+
+	tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+	tmp->offset = offset;
+	tmp->delta = delta;
+	if (compat_offsets) {
+		tmp->next = compat_offsets->next;
+		compat_offsets->next = tmp;
+	} else {
+		compat_offsets = tmp;
+		tmp->next = NULL;
+	}
+	return 0;
+}
+
+static void compat_flush_offsets(void)
+{
+	struct compat_delta *tmp, *next;
+
+	if (compat_offsets) {
+		for(tmp = compat_offsets; tmp; tmp = next) {
+			next = tmp->next;
+			kfree(tmp);
+		}
+		compat_offsets = NULL;
+	}
+}
+
+static short compat_calc_jump(unsigned int offset)
+{
+	struct compat_delta *tmp;
+	short delta;
+
+	for(tmp = compat_offsets, delta = 0; tmp; tmp = tmp->next)
+		if (tmp->offset < offset)
+			delta += tmp->delta;
+	return delta;
+}
+
+struct compat_ipt_standard_target
+{
+	struct compat_ipt_entry_target target;
+	compat_int_t verdict;
+};
+
+struct compat_ipt_standard
+{
+	struct compat_ipt_entry entry;
+	struct compat_ipt_standard_target target;
+};
+
+#define IPT_ST_LEN		XT_ALIGN(sizeof(struct ipt_standard_target))
+#define IPT_ST_COMPAT_LEN	COMPAT_XT_ALIGN(sizeof(struct compat_ipt_standard_target))
+#define IPT_ST_OFFSET		(IPT_ST_LEN - IPT_ST_COMPAT_LEN)
+
+static int compat_ipt_standard_fn(void *target,
+		void **dstptr, int *size, int convert)
+{
+	struct compat_ipt_standard_target compat_st, *pcompat_st;
+	struct ipt_standard_target st, *pst;
+	int ret;
+
+	ret = 0;
+	switch (convert) {
+		case COMPAT_TO_USER:
+			pst = target;
+			memcpy(&compat_st.target, &pst->target,
+				sizeof(compat_st.target));
+			compat_st.verdict = pst->verdict;
+			if (compat_st.verdict > 0)
+				compat_st.verdict -=
+					compat_calc_jump(compat_st.verdict);
+			compat_st.target.u.user.target_size = IPT_ST_COMPAT_LEN;
+			if (copy_to_user(*dstptr, &compat_st, IPT_ST_COMPAT_LEN))
+				ret = -EFAULT;
+			*size -= IPT_ST_OFFSET;
+			*dstptr += IPT_ST_COMPAT_LEN;
+			break;
+		case COMPAT_FROM_USER:
+			pcompat_st = target;
+			memcpy(&st.target, &pcompat_st->target, IPT_ST_COMPAT_LEN);
+			st.verdict = pcompat_st->verdict;
+			if (st.verdict > 0)
+				st.verdict += compat_calc_jump(st.verdict);
+			st.target.u.user.target_size = IPT_ST_LEN;
+			memcpy(*dstptr, &st, IPT_ST_LEN);
+			*size += IPT_ST_OFFSET;
+			*dstptr += IPT_ST_LEN;
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += IPT_ST_OFFSET;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+
+int ipt_target_align_compat(void *target, void **dstptr,
+		int *size, int off, int convert)
+{
+	struct compat_ipt_entry_target *pcompat;
+	struct ipt_entry_target *pt;
+	u_int16_t tsize;
+	int ret;
+
+	ret = 0;
+	switch (convert) {
+		case COMPAT_TO_USER:
+			pt = (struct ipt_entry_target *)target;
+			tsize = pt->u.user.target_size;
+			if (__copy_to_user(*dstptr, pt, tsize)) {
+				ret = -EFAULT;
+				break;
+			}
+			tsize -= off;
+			if (put_user(tsize, (u_int16_t *)*dstptr))
+				ret = -EFAULT;
+			*size -= off;
+			*dstptr += tsize;
+			break;
+		case COMPAT_FROM_USER:
+			pcompat = (struct compat_ipt_entry_target *)target;
+			pt = (struct ipt_entry_target *)*dstptr;
+			tsize = pcompat->u.user.target_size;
+			memcpy(pt, pcompat, tsize);
+			tsize += off;
+			pt->u.user.target_size = tsize;
+			*size += off;
+			*dstptr += tsize;
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+
+int ipt_match_align_compat(void *match, void **dstptr,
+		int *size, int off, int convert)
+{
+	struct compat_ipt_entry_match *pcompat_m;
+	struct ipt_entry_match *pm;
+	u_int16_t msize;
+	int ret;
+
+	ret = 0;
+	switch (convert) {
+		case COMPAT_TO_USER:
+			pm = (struct ipt_entry_match *)match;
+			msize = pm->u.user.match_size;
+			if (__copy_to_user(*dstptr, pm, msize)) {
+				ret = -EFAULT;
+				break;
+			}
+			msize -= off;
+			if (put_user(msize, (u_int16_t *)*dstptr))
+				ret = -EFAULT;
+			*size -= off;
+			*dstptr += msize;
+			break;
+		case COMPAT_FROM_USER:
+			pcompat_m = (struct compat_ipt_entry_match *)match;
+			pm = (struct ipt_entry_match *)*dstptr;
+			msize = pcompat_m->u.user.match_size;
+			memcpy(pm, pcompat_m, msize);
+			msize += off;
+			pm->u.user.match_size = msize;
+			*size += off;
+			*dstptr += msize;
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+
+static int icmp_compat(void *match,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_icmp)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_icmp));
+	return ipt_match_align_compat(match, dstptr, size, off, convert);
+}
+
+static inline int
+compat_calc_match(struct ipt_entry_match *m, int * size)
+{
+	if (m->u.kernel.match->compat)
+		m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE);
+	return 0;
+}
+
+static int compat_calc_entry(struct ipt_entry *e, struct xt_table_info *info,
+		void *base, struct xt_table_info *newinfo)
+{
+	struct ipt_entry_target *t;
+	unsigned int entry_offset;
+	int off, i, ret;
+
+	off = 0;
+	entry_offset = (void *)e - base;
+	IPT_MATCH_ITERATE(e, compat_calc_match, &off);
+	t = ipt_get_target(e);
+	if (t->u.kernel.target->compat)
+		t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE);
+	newinfo->size -= off;
+	ret = compat_add_offset(entry_offset, off);
+	if (ret)
+		return ret;
+
+	for (i = 0; i< NF_IP_NUMHOOKS; i++) {
+		if (info->hook_entry[i] && (e < (struct ipt_entry *)
+				(base + info->hook_entry[i])))
+			newinfo->hook_entry[i] -= off;
+		if (info->underflow[i] && (e < (struct ipt_entry *)
+				(base + info->underflow[i])))
+			newinfo->underflow[i] -= off;
+	}
+	return 0;
+}
+
+static int compat_table_info(struct xt_table_info *info,
+		struct xt_table_info *newinfo)
+{
+	void *loc_cpu_entry;
+	int i;
+
+	if (!newinfo || !info)
+		return -EINVAL;
+
+	memset(newinfo, 0, sizeof(struct xt_table_info));
+	newinfo->size = info->size;
+	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
+		newinfo->hook_entry[i] = info->hook_entry[i];
+		newinfo->underflow[i] = info->underflow[i];
+	}
+	loc_cpu_entry = info->entries[raw_smp_processor_id()];
+	return IPT_ENTRY_ITERATE(loc_cpu_entry, info->size,
+			compat_calc_entry, info, loc_cpu_entry, newinfo);
+}
+#endif
+
+static int get_info(void __user *user, int *len)
+{
+	char name[IPT_TABLE_MAXNAMELEN];
+	struct ipt_table *t;
+	int ret, size;
+
+#ifdef CONFIG_COMPAT
+	if (is_current_32bits())
+		size = sizeof(struct compat_ipt_getinfo);
+	else
+#endif
+		size = sizeof(struct ipt_getinfo);
+
+	if (*len != size) {
+		duprintf("length %u != %u\n", *len,
+			(unsigned int)sizeof(struct ipt_getinfo));
+		return -EINVAL;
+	}
+
+	if (copy_from_user(name, user, sizeof(name)) != 0)
+		return -EFAULT;
+
+	name[IPT_TABLE_MAXNAMELEN-1] = '\0';
+#ifdef CONFIG_COMPAT
+	down(&compat_ipt_mutex);
+#endif
+	t = try_then_request_module(xt_find_table_lock(AF_INET, name),
+			"iptable_%s", name);
+	if (t && !IS_ERR(t)) {
+		struct ipt_getinfo info;
+		struct xt_table_info *private = t->private;
+#ifdef CONFIG_COMPAT
+		struct compat_ipt_getinfo compat_info;
+#endif
+		void *pinfo;
+
+#ifdef CONFIG_COMPAT
+		if (is_current_32bits()) {
+			struct xt_table_info tmp;
+			ret = compat_table_info(private, &tmp);
+			compat_flush_offsets();
+			memcpy(compat_info.hook_entry, tmp.hook_entry,
+					sizeof(compat_info.hook_entry));
+			memcpy(compat_info.underflow, tmp.underflow,
+					sizeof(compat_info.underflow));
+			compat_info.valid_hooks = t->valid_hooks;
+			compat_info.num_entries = private->number;
+			compat_info.size = tmp.size;
+			strcpy(compat_info.name, name);
+			pinfo = (void *)&compat_info;
+		} else
+#endif
+		{
+			info.valid_hooks = t->valid_hooks;
+			memcpy(info.hook_entry, private->hook_entry,
+					sizeof(info.hook_entry));
+			memcpy(info.underflow, private->underflow,
+					sizeof(info.underflow));
+			info.num_entries = private->number;
+			info.size = private->size;
+			strcpy(info.name, name);
+			pinfo = (void *)&info;
+		}
+
+		if (copy_to_user(user, pinfo, *len) != 0)
+			ret = -EFAULT;
+		else
+			ret = 0;
+
+		xt_table_unlock(t);
+		module_put(t->me);
+	} else
+		ret = t ? PTR_ERR(t) : -ENOENT;
+#ifdef CONFIG_COMPAT
+		up(&compat_ipt_mutex);
+#endif
+	return ret;
+}
+
 static int
-get_entries(const struct ipt_get_entries *entries,
-	    struct ipt_get_entries __user *uptr)
+get_entries(struct ipt_get_entries __user *uptr, int *len)
 {
 	int ret;
+	struct ipt_get_entries get;
 	struct ipt_table *t;
 
-	t = xt_find_table_lock(AF_INET, entries->name);
+	if (*len < sizeof(get)) {
+		duprintf("get_entries: %u < %d\n", *len,
+				(unsigned int)sizeof(get));
+		return -EINVAL;
+	}
+	if (copy_from_user(&get, uptr, sizeof(get)) != 0)
+		return -EFAULT;
+	if (*len != sizeof(struct ipt_get_entries) + get.size) {
+		duprintf("get_entries: %u != %u\n", *len,
+				(unsigned int)(sizeof(struct ipt_get_entries) +
+				get.size));
+		return -EINVAL;
+	}
+
+	t = xt_find_table_lock(AF_INET, get.name);
 	if (t && !IS_ERR(t)) {
 		struct xt_table_info *private = t->private;
 		duprintf("t->private->number = %u\n",
 			 private->number);
-		if (entries->size == private->size)
+		if (get.size == private->size)
 			ret = copy_entries_to_user(private->size,
 						   t, uptr->entrytable);
 		else {
 			duprintf("get_entries: I've got %u not %u!\n",
 				 private->size,
-				 entries->size);
+				 get.size);
 			ret = -EINVAL;
 		}
 		module_put(t->me);
@@ -906,79 +1308,47 @@ get_entries(const struct ipt_get_entries
 }
 
 static int
-do_replace(void __user *user, unsigned int len)
+__do_replace(const char *name, unsigned int valid_hooks,
+		struct xt_table_info *newinfo, unsigned int num_counters,
+		void __user *counters_ptr)
 {
 	int ret;
-	struct ipt_replace tmp;
 	struct ipt_table *t;
-	struct xt_table_info *newinfo, *oldinfo;
+	struct xt_table_info *oldinfo;
 	struct xt_counters *counters;
-	void *loc_cpu_entry, *loc_cpu_old_entry;
+	void *loc_cpu_old_entry;
 
-	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
-		return -EFAULT;
+	ret = 0;
+	counters = ub_vmalloc_best(num_counters * sizeof(struct xt_counters));
+	if (!counters) {
+		ret = -ENOMEM;
+		goto out;
+	}
 
-	/* Hack: Causes ipchains to give correct error msg --RR */
-	if (len != sizeof(tmp) + tmp.size)
-		return -ENOPROTOOPT;
-
-	/* overflow check */
-	if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS -
-			SMP_CACHE_BYTES)
-		return -ENOMEM;
-	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
-		return -ENOMEM;
-
-	newinfo = xt_alloc_table_info(tmp.size);
-	if (!newinfo)
-		return -ENOMEM;
-
-	/* choose the copy that is our node/cpu */
-	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
-	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
-			   tmp.size) != 0) {
-		ret = -EFAULT;
-		goto free_newinfo;
-	}
-
-	counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
-	if (!counters) {
-		ret = -ENOMEM;
-		goto free_newinfo;
-	}
-
-	ret = translate_table(tmp.name, tmp.valid_hooks,
-			      newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
-			      tmp.hook_entry, tmp.underflow);
-	if (ret != 0)
-		goto free_newinfo_counters;
-
-	duprintf("ip_tables: Translated table\n");
-
-	t = try_then_request_module(xt_find_table_lock(AF_INET, tmp.name),
-				    "iptable_%s", tmp.name);
-	if (!t || IS_ERR(t)) {
-		ret = t ? PTR_ERR(t) : -ENOENT;
-		goto free_newinfo_counters_untrans;
-	}
+	t = try_then_request_module(xt_find_table_lock(AF_INET, name),
+				    "iptable_%s", name);
+	if (!t || IS_ERR(t)) {
+		ret = t ? PTR_ERR(t) : -ENOENT;
+		goto free_newinfo_counters_untrans;
+	}
 
 	/* You lied! */
-	if (tmp.valid_hooks != t->valid_hooks) {
+	if (valid_hooks != t->valid_hooks) {
 		duprintf("Valid hook crap: %08X vs %08X\n",
-			 tmp.valid_hooks, t->valid_hooks);
+			 valid_hooks, t->valid_hooks);
 		ret = -EINVAL;
 		goto put_module;
 	}
 
-	oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret);
+	oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
 	if (!oldinfo)
 		goto put_module;
 
 	/* Update module usage count based on number of rules */
 	duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
 		oldinfo->number, oldinfo->initial_entries, newinfo->number);
-	if ((oldinfo->number > oldinfo->initial_entries) || 
-	    (newinfo->number <= oldinfo->initial_entries)) 
+	if ((oldinfo->number > oldinfo->initial_entries) ||
+	    (newinfo->number <= oldinfo->initial_entries))
 		module_put(t->me);
 	if ((oldinfo->number > oldinfo->initial_entries) &&
 	    (newinfo->number <= oldinfo->initial_entries))
@@ -990,8 +1360,8 @@ do_replace(void __user *user, unsigned i
 	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
 	IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
 	xt_free_table_info(oldinfo);
-	if (copy_to_user(tmp.counters, counters,
-			 sizeof(struct xt_counters) * tmp.num_counters) != 0)
+	if (copy_to_user(counters_ptr, counters,
+			 sizeof(struct xt_counters) * num_counters) != 0)
 		ret = -EFAULT;
 	vfree(counters);
 	xt_table_unlock(t);
@@ -1001,9 +1371,62 @@ do_replace(void __user *user, unsigned i
 	module_put(t->me);
 	xt_table_unlock(t);
  free_newinfo_counters_untrans:
-	IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
- free_newinfo_counters:
 	vfree(counters);
+ out:
+	return ret;
+}
+
+static int
+do_replace(void __user *user, unsigned int len)
+{
+	int ret;
+	struct ipt_replace tmp;
+	struct xt_table_info *newinfo;
+	void *loc_cpu_entry;
+
+	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+		return -EFAULT;
+
+	/* Hack: Causes ipchains to give correct error msg --RR */
+	if (len != sizeof(tmp) + tmp.size)
+		return -ENOPROTOOPT;
+
+	/* overflow check */
+	if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS -
+			SMP_CACHE_BYTES)
+		return -ENOMEM;
+	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
+		return -ENOMEM;
+
+	newinfo = xt_alloc_table_info(tmp.size);
+	if (!newinfo)
+		return -ENOMEM;
+
+	/* choose the copy that is our node/cpu */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
+			   tmp.size) != 0) {
+		ret = -EFAULT;
+		goto free_newinfo;
+	}
+
+	ret = translate_table(tmp.name, tmp.valid_hooks,
+			      newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
+			      tmp.hook_entry, tmp.underflow);
+	if (ret != 0)
+		goto free_newinfo;
+
+	duprintf("ip_tables: Translated table\n");
+
+	ret = __do_replace(tmp.name, tmp.valid_hooks,
+			      newinfo, tmp.num_counters,
+			      tmp.counters);
+	if (ret)
+		goto free_newinfo_untrans;
+	return 0;
+
+ free_newinfo_untrans:
+	IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -1035,28 +1458,56 @@ static int
 do_add_counters(void __user *user, unsigned int len)
 {
 	unsigned int i;
-	struct xt_counters_info tmp, *paddc;
+	struct xt_counters_info tmp;
+	struct xt_counters *paddc;
+	unsigned int num_counters;
+	char *name;
+	int size;
+	void *ptmp;
 	struct ipt_table *t;
 	struct xt_table_info *private;
 	int ret = 0;
 	void *loc_cpu_entry;
+#ifdef CONFIG_COMPAT
+	struct compat_xt_counters_info compat_tmp;
 
-	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+	if (is_current_32bits()) {
+		ptmp = &compat_tmp;
+		size = sizeof(struct compat_xt_counters_info);
+	} else
+#endif
+	{
+		ptmp = &tmp;
+		size = sizeof(struct xt_counters_info);
+	}
+
+	if (copy_from_user(ptmp, user, size) != 0)
 		return -EFAULT;
 
-	if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
+#ifdef CONFIG_COMPAT
+	if (is_current_32bits()) {
+		num_counters = compat_tmp.num_counters;
+		name = compat_tmp.name;
+	} else
+#endif
+	{
+		num_counters = tmp.num_counters;
+		name = tmp.name;
+	}
+
+	if (len != size + num_counters * sizeof(struct xt_counters))
 		return -EINVAL;
 
-	paddc = vmalloc_node(len, numa_node_id());
+	paddc = ub_vmalloc_node(len - size, numa_node_id());
 	if (!paddc)
 		return -ENOMEM;
 
-	if (copy_from_user(paddc, user, len) != 0) {
+	if (copy_from_user(paddc, user + size, len - size) != 0) {
 		ret = -EFAULT;
 		goto free;
 	}
 
-	t = xt_find_table_lock(AF_INET, tmp.name);
+	t = xt_find_table_lock(AF_INET, name);
 	if (!t || IS_ERR(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free;
@@ -1064,7 +1515,7 @@ do_add_counters(void __user *user, unsig
 
 	write_lock_bh(&t->lock);
 	private = t->private;
-	if (private->number != tmp.num_counters) {
+	if (private->number != num_counters) {
 		ret = -EINVAL;
 		goto unlock_up_free;
 	}
@@ -1075,7 +1526,7 @@ do_add_counters(void __user *user, unsig
 	IPT_ENTRY_ITERATE(loc_cpu_entry,
 			  private->size,
 			  add_counter_to_entry,
-			  paddc->counters,
+			  paddc,
 			  &i);
  unlock_up_free:
 	write_unlock_bh(&t->lock);
@@ -1087,14 +1538,591 @@ do_add_counters(void __user *user, unsig
 	return ret;
 }
 
+#ifdef CONFIG_COMPAT
+struct compat_ipt_replace {
+	char			name[IPT_TABLE_MAXNAMELEN];
+	u32			valid_hooks;
+	u32			num_entries;
+	u32			size;
+	u32			hook_entry[NF_IP_NUMHOOKS];
+	u32			underflow[NF_IP_NUMHOOKS];
+	u32			num_counters;
+	compat_uptr_t		counters;	/* struct ipt_counters * */
+	struct compat_ipt_entry	entries[0];
+};
+
+static inline int compat_copy_match_to_user(struct ipt_entry_match *m,
+		void __user **dstptr, compat_uint_t *size)
+{
+	if (m->u.kernel.match->compat)
+		m->u.kernel.match->compat(m, dstptr, size, COMPAT_TO_USER);
+	else {
+		if (__copy_to_user(*dstptr, m, m->u.match_size))
+			return -EFAULT;
+		*dstptr += m->u.match_size;
+	}
+	return 0;
+}
+
+static int compat_copy_entry_to_user(struct ipt_entry *e,
+		void __user **dstptr, compat_uint_t *size)
+{
+	struct ipt_entry_target __user *t;
+	struct compat_ipt_entry __user *ce;
+	u_int16_t target_offset, next_offset;
+	compat_uint_t origsize;
+	int ret;
+
+	ret = -EFAULT;
+	origsize = *size;
+	ce = (struct compat_ipt_entry __user *)*dstptr;
+	if (__copy_to_user(ce, e, sizeof(struct ipt_entry)))
+		goto out;
+
+	*dstptr += sizeof(struct compat_ipt_entry);
+	ret = IPT_MATCH_ITERATE(e, compat_copy_match_to_user, dstptr, size);
+	target_offset = e->target_offset - (origsize - *size);
+	if (ret)
+		goto out;
+	t = ipt_get_target(e);
+	if (t->u.kernel.target->compat) {
+		ret = t->u.kernel.target->compat(t,
+				dstptr, size, COMPAT_TO_USER);
+		if (ret)
+			goto out;
+	} else {
+		ret = -EFAULT;
+		if (__copy_to_user(*dstptr, t, t->u.target_size))
+			goto out;
+		*dstptr += t->u.target_size;
+	}
+	ret = -EFAULT;
+	next_offset = e->next_offset - (origsize - *size);
+	if (__put_user(target_offset, &ce->target_offset))
+		goto out;
+	if (__put_user(next_offset, &ce->next_offset))
+		goto out;
+	return 0;
+out:
+	return ret;
+}
+
+static inline int
+compat_check_calc_match(struct ipt_entry_match *m,
+	    const char *name,
+	    const struct ipt_ip *ip,
+	    unsigned int hookmask,
+	    int *size, int *i)
+{
+	struct ipt_match *match;
+
+	match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
+						   m->u.user.revision),
+					"ipt_%s", m->u.user.name);
+	if (IS_ERR(match) || !match) {
+		duprintf("compat_check_calc_match: `%s' not found\n",
+				m->u.user.name);
+		return match ? PTR_ERR(match) : -ENOENT;
+	}
+	m->u.kernel.match = match;
+
+	if (m->u.kernel.match->compat)
+		m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE);
+
+	(*i)++;
+	return 0;
+}
+
+static inline int
+check_compat_entry_size_and_hooks(struct ipt_entry *e,
+			   struct xt_table_info *newinfo,
+			   unsigned int *size,
+			   unsigned char *base,
+			   unsigned char *limit,
+			   unsigned int *hook_entries,
+			   unsigned int *underflows,
+			   unsigned int *i,
+			   const char *name)
+{
+	struct ipt_entry_target *t;
+	struct ipt_target *target;
+	unsigned int entry_offset;
+	int ret, off, h, j;
+
+	duprintf("check_compat_entry_size_and_hooks %p\n", e);
+	if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0
+	    || (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) {
+		duprintf("Bad offset %p, limit = %p\n", e, limit);
+		return -EINVAL;
+	}
+
+	if (e->next_offset < sizeof(struct compat_ipt_entry) +
+			sizeof(struct compat_ipt_entry_target)) {
+		duprintf("checking: element %p size %u\n",
+			 e, e->next_offset);
+		return -EINVAL;
+	}
+
+	ret = check_entry(e, name);
+	if (ret)
+		return ret;
+
+	off = 0;
+	entry_offset = (void *)e - (void *)base;
+	j = 0;
+	ret = IPT_MATCH_ITERATE(e, compat_check_calc_match, name, &e->ip,
+			e->comefrom, &off, &j);
+	if (ret != 0)
+		goto out;
+
+	t = ipt_get_target(e);
+	target = try_then_request_module(xt_find_target(AF_INET,
+						     t->u.user.name,
+						     t->u.user.revision),
+					 "ipt_%s", t->u.user.name);
+	if (IS_ERR(target) || !target) {
+		duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
+							t->u.user.name);
+		ret = target ? PTR_ERR(target) : -ENOENT;
+		goto out;
+	}
+	t->u.kernel.target = target;
+
+	if (t->u.kernel.target->compat)
+		t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE);
+	*size += off;
+	ret = compat_add_offset(entry_offset, off);
+	if (ret)
+		goto out;
+
+	/* Check hooks & underflows */
+	for (h = 0; h < NF_IP_NUMHOOKS; h++) {
+		if ((unsigned char *)e - base == hook_entries[h])
+			newinfo->hook_entry[h] = hook_entries[h];
+		if ((unsigned char *)e - base == underflows[h])
+			newinfo->underflow[h] = underflows[h];
+	}
+
+	/* Clear counters and comefrom */
+	e->counters = ((struct ipt_counters) { 0, 0 });
+	e->comefrom = 0;
+
+	(*i)++;
+	return 0;
+out:
+	IPT_MATCH_ITERATE(e, cleanup_match, &j);
+	return ret;
+}
+
+static inline int compat_copy_match_from_user(struct ipt_entry_match *m,
+	void **dstptr, compat_uint_t *size)
+{
+	struct ipt_match *match;
+
+	match = m->u.kernel.match;
+	if (match->compat)
+		match->compat(m, dstptr, size, COMPAT_FROM_USER);
+	else {
+		memcpy(*dstptr, m, m->u.match_size);
+		*dstptr += m->u.match_size;
+	}
+
+	return 0;
+}
+
+static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
+	unsigned int *size, const char *name,
+	struct xt_table_info *newinfo, unsigned char *base)
+{
+	struct ipt_entry_target *t;
+	struct ipt_entry *de;
+	unsigned int origsize;
+	int ret, h;
+
+	ret = 0;
+	origsize = *size;
+	de = (struct ipt_entry *)*dstptr;
+	memcpy(de, e, sizeof(struct ipt_entry));
+
+	*dstptr += sizeof(struct compat_ipt_entry);
+	ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size);
+	if (ret)
+		return ret;
+	de->target_offset = e->target_offset - (origsize - *size);
+	t = ipt_get_target(e);
+	if (t->u.kernel.target->compat)
+		t->u.kernel.target->compat(t,
+				dstptr, size, COMPAT_FROM_USER);
+	else {
+		memcpy(*dstptr, t, t->u.target_size);
+		*dstptr += t->u.target_size;
+	}
+
+	de->next_offset = e->next_offset - (origsize - *size);
+	for (h = 0; h < NF_IP_NUMHOOKS; h++) {
+		if ((unsigned char *)de - base < newinfo->hook_entry[h])
+			newinfo->hook_entry[h] -= origsize - *size;
+		if ((unsigned char *)de - base < newinfo->underflow[h])
+			newinfo->underflow[h] -= origsize - *size;
+	}
+
+	return ret;
+}
+
+static inline int check_entry_data(struct ipt_entry *e, const char *name)
+{
+	int ret;
+	ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom);
+	if (ret != 0)
+		return ret;
+
+	return check_target(e, name);
+}
+
+static int
+translate_compat_table(const char *name,
+		unsigned int valid_hooks,
+		struct xt_table_info **pinfo,
+		void **pentry0,
+		unsigned int total_size,
+		unsigned int number,
+		unsigned int *hook_entries,
+		unsigned int *underflows)
+{
+	unsigned int i, j;
+	struct xt_table_info *newinfo, *info;
+	void *pos, *entry0, *entry1;
+	unsigned int size;
+	int ret;
+
+	info = *pinfo;
+	entry0 = *pentry0;
+	size = total_size;
+	info->number = number;
+
+	/* Init all hooks to impossible value. */
+	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
+		info->hook_entry[i] = 0xFFFFFFFF;
+		info->underflow[i] = 0xFFFFFFFF;
+	}
+
+	duprintf("translate_compat_table: size %u\n", info->size);
+	i = 0;
+	down(&compat_ipt_mutex);
+	/* Walk through entries, checking offsets. */
+	ret = IPT_ENTRY_ITERATE(entry0, total_size,
+				check_compat_entry_size_and_hooks,
+				info, &size, entry0,
+				entry0 + total_size,
+				hook_entries, underflows, &i, name);
+	if (ret != 0)
+		goto out_unlock;
+
+	ret = -EINVAL;
+	if (i != number) {
+		duprintf("translate_compat_table: %u not %u entries\n",
+			 i, number);
+		goto out_unlock;
+	}
+
+	/* Check hooks all assigned */
+	for (j = 0; j < NF_IP_NUMHOOKS; j++) {
+		/* Only hooks which are valid */
+		if (!(valid_hooks & (1 << j)))
+			continue;
+		if (info->hook_entry[j] == 0xFFFFFFFF) {
+			duprintf("Invalid hook entry %u %u\n",
+				 j, hook_entries[j]);
+			goto out_unlock;
+		}
+		if (info->underflow[j] == 0xFFFFFFFF) {
+			duprintf("Invalid underflow %u %u\n",
+				 j, underflows[j]);
+			goto out_unlock;
+		}
+	}
+
+	ret = -ENOMEM;
+	newinfo = xt_alloc_table_info(size);
+	if (!newinfo)
+		goto out_unlock;
+
+	newinfo->number = number;
+	for (j = 0; j < NF_IP_NUMHOOKS; j++) {
+		newinfo->hook_entry[j] = info->hook_entry[j];
+		newinfo->underflow[j] = info->underflow[j];
+	}
+	entry1 = newinfo->entries[raw_smp_processor_id()];
+	pos = entry1;
+	size =  total_size;
+	ret = IPT_ENTRY_ITERATE(entry0, total_size,
+			compat_copy_entry_from_user, &pos, &size,
+			name, newinfo, entry1);
+	compat_flush_offsets();
+	up(&compat_ipt_mutex);
+	if (ret)
+		goto free_newinfo;
+
+	ret = -ELOOP;
+	if (!mark_source_chains(newinfo, valid_hooks, entry1))
+		goto free_newinfo;
+
+	ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, check_entry_data, name);
+	if (ret)
+		goto free_newinfo;
+
+	/* And one copy for every other CPU */
+	for_each_cpu(i)
+		if (newinfo->entries[i] && newinfo->entries[i] != entry1)
+			memcpy(newinfo->entries[i], entry1, newinfo->size);
+
+	*pinfo = newinfo;
+	*pentry0 = entry1;
+	xt_free_table_info(info);
+	return 0;
+
+free_newinfo:
+	xt_free_table_info(newinfo);
+out:
+	IPT_ENTRY_ITERATE(entry0, total_size, cleanup_entry, &i);
+	return ret;
+out_unlock:
+	compat_flush_offsets();
+	up(&compat_ipt_mutex);
+	goto out;
+}
+
+static int
+compat_do_replace(void __user *user, unsigned int len)
+{
+	int ret;
+	struct compat_ipt_replace tmp;
+	struct xt_table_info *newinfo;
+	void *loc_cpu_entry;
+
+	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+		return -EFAULT;
+
+	/* Hack: Causes ipchains to give correct error msg --RR */
+	if (len != sizeof(tmp) + tmp.size)
+		return -ENOPROTOOPT;
+
+	/* overflow check */
+	if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS -
+			SMP_CACHE_BYTES)
+		return -ENOMEM;
+	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
+		return -ENOMEM;
+
+	newinfo = xt_alloc_table_info(tmp.size);
+	if (!newinfo)
+		return -ENOMEM;
+
+	/* choose the copy that is our node/cpu */
+	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+	if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
+			   tmp.size) != 0) {
+		ret = -EFAULT;
+		goto free_newinfo;
+	}
+
+	ret = translate_compat_table(tmp.name, tmp.valid_hooks,
+			      &newinfo, &loc_cpu_entry, tmp.size,
+			      tmp.num_entries, tmp.hook_entry, tmp.underflow);
+	if (ret != 0)
+		goto free_newinfo;
+
+	duprintf("compat_do_replace: Translated table\n");
+
+	ret = __do_replace(tmp.name, tmp.valid_hooks,
+			      newinfo, tmp.num_counters,
+			      compat_ptr(tmp.counters));
+	if (ret)
+		goto free_newinfo_untrans;
+	return 0;
+
+ free_newinfo_untrans:
+	IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
+ free_newinfo:
+	xt_free_table_info(newinfo);
+	return ret;
+}
+
+struct compat_ipt_get_entries
+{
+	char name[IPT_TABLE_MAXNAMELEN];
+	compat_uint_t size;
+	struct compat_ipt_entry entrytable[0];
+};
+
+static int compat_copy_entries_to_user(unsigned int total_size,
+		     struct ipt_table *table, void __user *userptr)
+{
+	unsigned int off, num;
+	struct compat_ipt_entry e;
+	struct xt_counters *counters;
+	struct xt_table_info *private = table->private;
+	void __user *pos;
+	unsigned int size;
+	int ret = 0;
+	void *loc_cpu_entry;
+
+	counters = alloc_counters(table);
+	if (IS_ERR(counters))
+		return PTR_ERR(counters);
+
+	/* choose the copy that is on our node/cpu, ...
+	 * This choice is lazy (because current thread is
+	 * allowed to migrate to another cpu)
+	 */
+	loc_cpu_entry = private->entries[raw_smp_processor_id()];
+	pos = userptr;
+	size = total_size;
+	ret = IPT_ENTRY_ITERATE(loc_cpu_entry, total_size,
+			compat_copy_entry_to_user, &pos, &size);
+	if (ret)
+		goto free_counters;
+
+	/* ... then go back and fix counters and names */
+	for (off = 0, num = 0; off < size; off += e.next_offset, num++) {
+		unsigned int i;
+		struct ipt_entry_match m;
+		struct ipt_entry_target t;
+
+		ret = -EFAULT;
+		if (copy_from_user(&e, userptr + off,
+					sizeof(struct compat_ipt_entry)))
+			goto free_counters;
+		if (copy_to_user(userptr + off +
+			offsetof(struct compat_ipt_entry, counters),
+			 &counters[num], sizeof(counters[num])))
+			goto free_counters;
+
+		for (i = sizeof(struct compat_ipt_entry);
+				i < e.target_offset; i += m.u.match_size) {
+			if (copy_from_user(&m, userptr + off + i,
+					sizeof(struct ipt_entry_match)))
+				goto free_counters;
+			if (copy_to_user(userptr + off + i +
+				offsetof(struct ipt_entry_match, u.user.name),
+				m.u.kernel.match->name,
+				strlen(m.u.kernel.match->name) + 1))
+				goto free_counters;
+		}
+
+		if (copy_from_user(&t, userptr + off + e.target_offset,
+					sizeof(struct ipt_entry_target)))
+			goto free_counters;
+		if (copy_to_user(userptr + off + e.target_offset +
+			offsetof(struct ipt_entry_target, u.user.name),
+			t.u.kernel.target->name,
+			strlen(t.u.kernel.target->name) + 1))
+			goto free_counters;
+	}
+	ret = 0;
+free_counters:
+	vfree(counters);
+	return ret;
+}
+
+static int
+compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
+{
+	int ret;
+	struct compat_ipt_get_entries get;
+	struct ipt_table *t;
+
+
+	if (*len < sizeof(get)) {
+		duprintf("compat_get_entries: %u < %u\n",
+				*len, (unsigned int)sizeof(get));
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&get, uptr, sizeof(get)) != 0)
+		return -EFAULT;
+
+	if (*len != sizeof(struct compat_ipt_get_entries) + get.size) {
+		duprintf("compat_get_entries: %u != %u\n", *len,
+			(unsigned int)(sizeof(struct compat_ipt_get_entries) +
+			get.size));
+		return -EINVAL;
+	}
+
+	down(&compat_ipt_mutex);
+	t = xt_find_table_lock(AF_INET, get.name);
+	if (t && !IS_ERR(t)) {
+		struct xt_table_info *private = t->private;
+		struct xt_table_info info;
+		duprintf("t->private->number = %u\n",
+			 private->number);
+		ret = compat_table_info(private, &info);
+		if (!ret && get.size == info.size) {
+			ret = compat_copy_entries_to_user(private->size,
+						   t, uptr->entrytable);
+		} else if (!ret) {
+			duprintf("compat_get_entries: I've got %u not %u!\n",
+				 private->size,
+				 get.size);
+			ret = -EINVAL;
+		}
+		compat_flush_offsets();
+		module_put(t->me);
+		xt_table_unlock(t);
+	} else
+		ret = t ? PTR_ERR(t) : -ENOENT;
+
+	up(&compat_ipt_mutex);
+	return ret;
+}
+
+static int do_ipt_get_ctl(struct sock *, int, void __user *, int *);
+
+static int
+compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+	int ret;
+
+	if (!capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
+		return -EPERM;
+
+#ifdef CONFIG_VE_IPTABLES
+	if (!get_exec_env()->_xt_tables[AF_INET].next)
+		return -ENOENT;
+#endif
+
+	switch (cmd) {
+	case IPT_SO_GET_INFO:
+		ret = get_info(user, len);
+		break;
+	case IPT_SO_GET_ENTRIES:
+		ret = compat_get_entries(user, len);
+		break;
+	default:
+		ret = do_ipt_get_ctl(sk, cmd, user, len);
+	}
+	return ret;
+}
+#endif
+
 static int
 do_ipt_set_ctl(struct sock *sk,	int cmd, void __user *user, unsigned int len)
 {
 	int ret;
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
 		return -EPERM;
 
+#ifdef CONFIG_VE_IPTABLES
+	if (!get_exec_env()->_xt_tables[AF_INET].next)
+		return -ENOENT;
+#endif
+
+#ifdef CONFIG_COMPAT
+	if (is_current_32bits() && (cmd == IPT_SO_SET_REPLACE))
+		return compat_do_replace(user, len);
+#endif
+
 	switch (cmd) {
 	case IPT_SO_SET_REPLACE:
 		ret = do_replace(user, len);
@@ -1117,69 +2145,27 @@ do_ipt_get_ctl(struct sock *sk, int cmd,
 {
 	int ret;
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
 		return -EPERM;
 
-	switch (cmd) {
-	case IPT_SO_GET_INFO: {
-		char name[IPT_TABLE_MAXNAMELEN];
-		struct ipt_table *t;
-
-		if (*len != sizeof(struct ipt_getinfo)) {
-			duprintf("length %u != %u\n", *len,
-				 sizeof(struct ipt_getinfo));
-			ret = -EINVAL;
-			break;
-		}
-
-		if (copy_from_user(name, user, sizeof(name)) != 0) {
-			ret = -EFAULT;
-			break;
-		}
-		name[IPT_TABLE_MAXNAMELEN-1] = '\0';
-
-		t = try_then_request_module(xt_find_table_lock(AF_INET, name),
-					    "iptable_%s", name);
-		if (t && !IS_ERR(t)) {
-			struct ipt_getinfo info;
-			struct xt_table_info *private = t->private;
-
-			info.valid_hooks = t->valid_hooks;
-			memcpy(info.hook_entry, private->hook_entry,
-			       sizeof(info.hook_entry));
-			memcpy(info.underflow, private->underflow,
-			       sizeof(info.underflow));
-			info.num_entries = private->number;
-			info.size = private->size;
-			memcpy(info.name, name, sizeof(info.name));
+#ifdef CONFIG_VE_IPTABLES
+	if (!get_exec_env()->_xt_tables[AF_INET].next)
+		return -ENOENT;
+#endif
 
-			if (copy_to_user(user, &info, *len) != 0)
-				ret = -EFAULT;
-			else
-				ret = 0;
-			xt_table_unlock(t);
-			module_put(t->me);
-		} else
-			ret = t ? PTR_ERR(t) : -ENOENT;
-	}
-	break;
+#ifdef CONFIG_COMPAT
+	if (is_current_32bits())
+		return compat_do_ipt_get_ctl(sk, cmd, user, len);
+#endif
 
-	case IPT_SO_GET_ENTRIES: {
-		struct ipt_get_entries get;
+	switch (cmd) {
+	case IPT_SO_GET_INFO:
+		ret = get_info(user, len);
+		break;
 
-		if (*len < sizeof(get)) {
-			duprintf("get_entries: %u < %u\n", *len, sizeof(get));
-			ret = -EINVAL;
-		} else if (copy_from_user(&get, user, sizeof(get)) != 0) {
-			ret = -EFAULT;
-		} else if (*len != sizeof(struct ipt_get_entries) + get.size) {
-			duprintf("get_entries: %u != %u\n", *len,
-				 sizeof(struct ipt_get_entries) + get.size);
-			ret = -EINVAL;
-		} else
-			ret = get_entries(&get, user);
+	case IPT_SO_GET_ENTRIES:
+		ret = get_entries(user, len);
 		break;
-	}
 
 	case IPT_SO_GET_REVISION_MATCH:
 	case IPT_SO_GET_REVISION_TARGET: {
@@ -1215,17 +2201,18 @@ do_ipt_get_ctl(struct sock *sk, int cmd,
 	return ret;
 }
 
-int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
+struct xt_table *ipt_register_table(struct xt_table *table,
+		const struct ipt_replace *repl)
 {
 	int ret;
 	struct xt_table_info *newinfo;
 	static struct xt_table_info bootstrap
-		= { 0, 0, 0, { 0 }, { 0 }, { } };
+		= { 0, 0, 0, 0, { 0 }, { 0 }, { } };
 	void *loc_cpu_entry;
 
 	newinfo = xt_alloc_table_info(repl->size);
 	if (!newinfo)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	/* choose the copy on our node/cpu
 	 * but dont care of preemption
@@ -1240,27 +2227,30 @@ int ipt_register_table(struct xt_table *
 			      repl->underflow);
 	if (ret != 0) {
 		xt_free_table_info(newinfo);
-		return ret;
+		return ERR_PTR(ret);
 	}
 
-	if (xt_register_table(table, &bootstrap, newinfo) != 0) {
+	table = virt_xt_register_table(table, &bootstrap, newinfo);
+	if (IS_ERR(table))
 		xt_free_table_info(newinfo);
-		return ret;
-	}
 
-	return 0;
+	return table;
 }
 
 void ipt_unregister_table(struct ipt_table *table)
 {
 	struct xt_table_info *private;
 	void *loc_cpu_entry;
+	struct module *me;
 
- 	private = xt_unregister_table(table);
+	me = table->me;
+ 	private = virt_xt_unregister_table(table);
 
 	/* Decrease module usage counts and free resources */
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
 	IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
+	if (private->number > private->initial_entries)
+		module_put(me);
 	xt_free_table_info(private);
 }
 
@@ -1328,6 +2318,9 @@ icmp_checkentry(const char *tablename,
 /* The built-in targets: standard (NULL) and error. */
 static struct ipt_target ipt_standard_target = {
 	.name		= IPT_STANDARD_TARGET,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat_ipt_standard_fn,
+#endif
 };
 
 static struct ipt_target ipt_error_target = {
@@ -1349,43 +2342,91 @@ static struct ipt_match icmp_matchstruct
 	.name		= "icmp",
 	.match		= &icmp_match,
 	.checkentry	= &icmp_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &icmp_compat,
+#endif
 };
 
+static int init_iptables(void)
+{
+#ifdef CONFIG_VE_IPTABLES
+	if (get_exec_env()->_xt_tables[AF_INET].next != NULL)
+		return -EEXIST;
+#endif
+
+	return xt_proto_init(AF_INET);
+}
+
+static void fini_iptables(void)
+{
+#ifdef CONFIG_VE_IPTABLES
+	get_exec_env()->_xt_tables[AF_INET].next = NULL;
+#endif
+	xt_proto_fini(AF_INET);
+}
+
 static int __init init(void)
 {
 	int ret;
 
-	xt_proto_init(AF_INET);
+	ret = init_iptables();
+	if (ret)
+		goto err1;
 
 	/* Noone else will be downing sem now, so we won't sleep */
-	xt_register_target(AF_INET, &ipt_standard_target);
-	xt_register_target(AF_INET, &ipt_error_target);
-	xt_register_match(AF_INET, &icmp_matchstruct);
+	ret = xt_register_target(AF_INET, &ipt_standard_target);
+	if (ret < 0)
+		goto err2;
+	ret = xt_register_target(AF_INET, &ipt_error_target);
+	if (ret < 0)
+		goto err3;
+	ret = xt_register_match(AF_INET, &icmp_matchstruct);
+	if (ret < 0)
+		goto err4;
 
 	/* Register setsockopt */
 	ret = nf_register_sockopt(&ipt_sockopts);
 	if (ret < 0) {
 		duprintf("Unable to register sockopts.\n");
-		return ret;
+		goto err5;
 	}
 
+	KSYMRESOLVE(init_iptables);
+	KSYMRESOLVE(fini_iptables);
+	KSYMMODRESOLVE(ip_tables);
 	printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n");
 	return 0;
+
+err5:
+	xt_unregister_match(AF_INET, &icmp_matchstruct);
+err4:
+	xt_unregister_target(AF_INET, &ipt_error_target);
+err3:
+	xt_unregister_target(AF_INET, &ipt_standard_target);
+err2:
+	fini_iptables();
+err1:
+	return ret;
 }
 
 static void __exit fini(void)
 {
+	KSYMMODUNRESOLVE(ip_tables);
+	KSYMUNRESOLVE(init_iptables);
+	KSYMUNRESOLVE(fini_iptables);
 	nf_unregister_sockopt(&ipt_sockopts);
-
 	xt_unregister_match(AF_INET, &icmp_matchstruct);
 	xt_unregister_target(AF_INET, &ipt_error_target);
 	xt_unregister_target(AF_INET, &ipt_standard_target);
-
-	xt_proto_fini(AF_INET);
+	fini_iptables();
 }
 
 EXPORT_SYMBOL(ipt_register_table);
 EXPORT_SYMBOL(ipt_unregister_table);
 EXPORT_SYMBOL(ipt_do_table);
-module_init(init);
+#ifdef CONFIG_COMPAT
+EXPORT_SYMBOL(ipt_match_align_compat);
+EXPORT_SYMBOL(ipt_target_align_compat);
+#endif
+subsys_initcall(init);
 module_exit(fini);
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ipt_LOG.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ipt_LOG.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ipt_LOG.c	2007-08-24 19:28:15.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ipt_LOG.c	2007-08-28 17:35:32.000000000 +0400
@@ -51,32 +51,32 @@ static void dump_packet(const struct nf_
 
 	ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
 	if (ih == NULL) {
-		printk("TRUNCATED");
+		ve_printk(VE_LOG, "TRUNCATED");
 		return;
 	}
 
 	/* Important fields:
 	 * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */
 	/* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */
-	printk("SRC=%u.%u.%u.%u DST=%u.%u.%u.%u ",
+	ve_printk(VE_LOG, "SRC=%u.%u.%u.%u DST=%u.%u.%u.%u ",
 	       NIPQUAD(ih->saddr), NIPQUAD(ih->daddr));
 
 	/* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
-	printk("LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
+	ve_printk(VE_LOG, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
 	       ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK,
 	       ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id));
 
 	/* Max length: 6 "CE DF MF " */
 	if (ntohs(ih->frag_off) & IP_CE)
-		printk("CE ");
+		ve_printk(VE_LOG, "CE ");
 	if (ntohs(ih->frag_off) & IP_DF)
-		printk("DF ");
+		ve_printk(VE_LOG, "DF ");
 	if (ntohs(ih->frag_off) & IP_MF)
-		printk("MF ");
+		ve_printk(VE_LOG, "MF ");
 
 	/* Max length: 11 "FRAG:65535 " */
 	if (ntohs(ih->frag_off) & IP_OFFSET)
-		printk("FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
+		ve_printk(VE_LOG, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
 
 	if ((logflags & IPT_LOG_IPOPT)
 	    && ih->ihl * 4 > sizeof(struct iphdr)) {
@@ -87,15 +87,15 @@ static void dump_packet(const struct nf_
 		op = skb_header_pointer(skb, iphoff+sizeof(_iph),
 					optsize, _opt);
 		if (op == NULL) {
-			printk("TRUNCATED");
+			ve_printk(VE_LOG, "TRUNCATED");
 			return;
 		}
 
 		/* Max length: 127 "OPT (" 15*4*2chars ") " */
-		printk("OPT (");
+		ve_printk(VE_LOG, "OPT (");
 		for (i = 0; i < optsize; i++)
-			printk("%02X", op[i]);
-		printk(") ");
+			ve_printk(VE_LOG, "%02X", op[i]);
+		ve_printk(VE_LOG, ") ");
 	}
 
 	switch (ih->protocol) {
@@ -103,7 +103,7 @@ static void dump_packet(const struct nf_
 		struct tcphdr _tcph, *th;
 
 		/* Max length: 10 "PROTO=TCP " */
-		printk("PROTO=TCP ");
+		ve_printk(VE_LOG, "PROTO=TCP ");
 
 		if (ntohs(ih->frag_off) & IP_OFFSET)
 			break;
@@ -112,41 +112,41 @@ static void dump_packet(const struct nf_
 		th = skb_header_pointer(skb, iphoff + ih->ihl * 4,
 					sizeof(_tcph), &_tcph);
 		if (th == NULL) {
-			printk("INCOMPLETE [%u bytes] ",
+			ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
 			       skb->len - iphoff - ih->ihl*4);
 			break;
 		}
 
 		/* Max length: 20 "SPT=65535 DPT=65535 " */
-		printk("SPT=%u DPT=%u ",
+		ve_printk(VE_LOG, "SPT=%u DPT=%u ",
 		       ntohs(th->source), ntohs(th->dest));
 		/* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
 		if (logflags & IPT_LOG_TCPSEQ)
-			printk("SEQ=%u ACK=%u ",
+			ve_printk(VE_LOG, "SEQ=%u ACK=%u ",
 			       ntohl(th->seq), ntohl(th->ack_seq));
 		/* Max length: 13 "WINDOW=65535 " */
-		printk("WINDOW=%u ", ntohs(th->window));
+		ve_printk(VE_LOG, "WINDOW=%u ", ntohs(th->window));
 		/* Max length: 9 "RES=0x3F " */
-		printk("RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
+		ve_printk(VE_LOG, "RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
 		/* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
 		if (th->cwr)
-			printk("CWR ");
+			ve_printk(VE_LOG, "CWR ");
 		if (th->ece)
-			printk("ECE ");
+			ve_printk(VE_LOG, "ECE ");
 		if (th->urg)
-			printk("URG ");
+			ve_printk(VE_LOG, "URG ");
 		if (th->ack)
-			printk("ACK ");
+			ve_printk(VE_LOG, "ACK ");
 		if (th->psh)
-			printk("PSH ");
+			ve_printk(VE_LOG, "PSH ");
 		if (th->rst)
-			printk("RST ");
+			ve_printk(VE_LOG, "RST ");
 		if (th->syn)
-			printk("SYN ");
+			ve_printk(VE_LOG, "SYN ");
 		if (th->fin)
-			printk("FIN ");
+			ve_printk(VE_LOG, "FIN ");
 		/* Max length: 11 "URGP=65535 " */
-		printk("URGP=%u ", ntohs(th->urg_ptr));
+		ve_printk(VE_LOG, "URGP=%u ", ntohs(th->urg_ptr));
 
 		if ((logflags & IPT_LOG_TCPOPT)
 		    && th->doff * 4 > sizeof(struct tcphdr)) {
@@ -159,15 +159,15 @@ static void dump_packet(const struct nf_
 						iphoff+ih->ihl*4+sizeof(_tcph),
 						optsize, _opt);
 			if (op == NULL) {
-				printk("TRUNCATED");
+				ve_printk(VE_LOG, "TRUNCATED");
 				return;
 			}
 
 			/* Max length: 127 "OPT (" 15*4*2chars ") " */
-			printk("OPT (");
+			ve_printk(VE_LOG, "OPT (");
 			for (i = 0; i < optsize; i++)
-				printk("%02X", op[i]);
-			printk(") ");
+				ve_printk(VE_LOG, "%02X", op[i]);
+			ve_printk(VE_LOG, ") ");
 		}
 		break;
 	}
@@ -175,7 +175,7 @@ static void dump_packet(const struct nf_
 		struct udphdr _udph, *uh;
 
 		/* Max length: 10 "PROTO=UDP " */
-		printk("PROTO=UDP ");
+		ve_printk(VE_LOG, "PROTO=UDP ");
 
 		if (ntohs(ih->frag_off) & IP_OFFSET)
 			break;
@@ -184,13 +184,13 @@ static void dump_packet(const struct nf_
 		uh = skb_header_pointer(skb, iphoff+ih->ihl*4,
 					sizeof(_udph), &_udph);
 		if (uh == NULL) {
-			printk("INCOMPLETE [%u bytes] ",
+			ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
 			       skb->len - iphoff - ih->ihl*4);
 			break;
 		}
 
 		/* Max length: 20 "SPT=65535 DPT=65535 " */
-		printk("SPT=%u DPT=%u LEN=%u ",
+		ve_printk(VE_LOG, "SPT=%u DPT=%u LEN=%u ",
 		       ntohs(uh->source), ntohs(uh->dest),
 		       ntohs(uh->len));
 		break;
@@ -216,7 +216,7 @@ static void dump_packet(const struct nf_
 			    [ICMP_ADDRESSREPLY] = 12 };
 
 		/* Max length: 11 "PROTO=ICMP " */
-		printk("PROTO=ICMP ");
+		ve_printk(VE_LOG, "PROTO=ICMP ");
 
 		if (ntohs(ih->frag_off) & IP_OFFSET)
 			break;
@@ -225,19 +225,19 @@ static void dump_packet(const struct nf_
 		ich = skb_header_pointer(skb, iphoff + ih->ihl * 4,
 					 sizeof(_icmph), &_icmph);
 		if (ich == NULL) {
-			printk("INCOMPLETE [%u bytes] ",
+			ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
 			       skb->len - iphoff - ih->ihl*4);
 			break;
 		}
 
 		/* Max length: 18 "TYPE=255 CODE=255 " */
-		printk("TYPE=%u CODE=%u ", ich->type, ich->code);
+		ve_printk(VE_LOG, "TYPE=%u CODE=%u ", ich->type, ich->code);
 
 		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
 		if (ich->type <= NR_ICMP_TYPES
 		    && required_len[ich->type]
 		    && skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) {
-			printk("INCOMPLETE [%u bytes] ",
+			ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
 			       skb->len - iphoff - ih->ihl*4);
 			break;
 		}
@@ -246,19 +246,19 @@ static void dump_packet(const struct nf_
 		case ICMP_ECHOREPLY:
 		case ICMP_ECHO:
 			/* Max length: 19 "ID=65535 SEQ=65535 " */
-			printk("ID=%u SEQ=%u ",
+			ve_printk(VE_LOG, "ID=%u SEQ=%u ",
 			       ntohs(ich->un.echo.id),
 			       ntohs(ich->un.echo.sequence));
 			break;
 
 		case ICMP_PARAMETERPROB:
 			/* Max length: 14 "PARAMETER=255 " */
-			printk("PARAMETER=%u ",
+			ve_printk(VE_LOG, "PARAMETER=%u ",
 			       ntohl(ich->un.gateway) >> 24);
 			break;
 		case ICMP_REDIRECT:
 			/* Max length: 24 "GATEWAY=255.255.255.255 " */
-			printk("GATEWAY=%u.%u.%u.%u ",
+			ve_printk(VE_LOG, "GATEWAY=%u.%u.%u.%u ",
 			       NIPQUAD(ich->un.gateway));
 			/* Fall through */
 		case ICMP_DEST_UNREACH:
@@ -266,16 +266,16 @@ static void dump_packet(const struct nf_
 		case ICMP_TIME_EXCEEDED:
 			/* Max length: 3+maxlen */
 			if (!iphoff) { /* Only recurse once. */
-				printk("[");
+				ve_printk(VE_LOG, "[");
 				dump_packet(info, skb,
 					    iphoff + ih->ihl*4+sizeof(_icmph));
-				printk("] ");
+				ve_printk(VE_LOG, "] ");
 			}
 
 			/* Max length: 10 "MTU=65535 " */
 			if (ich->type == ICMP_DEST_UNREACH
 			    && ich->code == ICMP_FRAG_NEEDED)
-				printk("MTU=%u ", ntohs(ich->un.frag.mtu));
+				ve_printk(VE_LOG, "MTU=%u ", ntohs(ich->un.frag.mtu));
 		}
 		break;
 	}
@@ -287,26 +287,26 @@ static void dump_packet(const struct nf_
 			break;
 		
 		/* Max length: 9 "PROTO=AH " */
-		printk("PROTO=AH ");
+		ve_printk(VE_LOG, "PROTO=AH ");
 
 		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
 		ah = skb_header_pointer(skb, iphoff+ih->ihl*4,
 					sizeof(_ahdr), &_ahdr);
 		if (ah == NULL) {
-			printk("INCOMPLETE [%u bytes] ",
+			ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
 			       skb->len - iphoff - ih->ihl*4);
 			break;
 		}
 
 		/* Length: 15 "SPI=0xF1234567 " */
-		printk("SPI=0x%x ", ntohl(ah->spi));
+		ve_printk(VE_LOG, "SPI=0x%x ", ntohl(ah->spi));
 		break;
 	}
 	case IPPROTO_ESP: {
 		struct ip_esp_hdr _esph, *eh;
 
 		/* Max length: 10 "PROTO=ESP " */
-		printk("PROTO=ESP ");
+		ve_printk(VE_LOG, "PROTO=ESP ");
 
 		if (ntohs(ih->frag_off) & IP_OFFSET)
 			break;
@@ -315,25 +315,25 @@ static void dump_packet(const struct nf_
 		eh = skb_header_pointer(skb, iphoff+ih->ihl*4,
 					sizeof(_esph), &_esph);
 		if (eh == NULL) {
-			printk("INCOMPLETE [%u bytes] ",
+			ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
 			       skb->len - iphoff - ih->ihl*4);
 			break;
 		}
 
 		/* Length: 15 "SPI=0xF1234567 " */
-		printk("SPI=0x%x ", ntohl(eh->spi));
+		ve_printk(VE_LOG, "SPI=0x%x ", ntohl(eh->spi));
 		break;
 	}
 	/* Max length: 10 "PROTO 255 " */
 	default:
-		printk("PROTO=%u ", ih->protocol);
+		ve_printk(VE_LOG, "PROTO=%u ", ih->protocol);
 	}
 
 	/* Max length: 15 "UID=4294967295 " */
  	if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) {
 		read_lock_bh(&skb->sk->sk_callback_lock);
 		if (skb->sk->sk_socket && skb->sk->sk_socket->file)
- 			printk("UID=%u ", skb->sk->sk_socket->file->f_uid);
+ 			ve_printk(VE_LOG, "UID=%u ", skb->sk->sk_socket->file->f_uid);
 		read_unlock_bh(&skb->sk->sk_callback_lock);
 	}
 
@@ -374,7 +374,7 @@ ipt_log_packet(unsigned int pf,
 		loginfo = &default_loginfo;
 
 	spin_lock_bh(&log_lock);
-	printk("<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
+	ve_printk(VE_LOG, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
 	       prefix,
 	       in ? in->name : "",
 	       out ? out->name : "");
@@ -384,15 +384,15 @@ ipt_log_packet(unsigned int pf,
 		struct net_device *physoutdev = skb->nf_bridge->physoutdev;
 
 		if (physindev && in != physindev)
-			printk("PHYSIN=%s ", physindev->name);
+			ve_printk(VE_LOG, "PHYSIN=%s ", physindev->name);
 		if (physoutdev && out != physoutdev)
-			printk("PHYSOUT=%s ", physoutdev->name);
+			ve_printk(VE_LOG, "PHYSOUT=%s ", physoutdev->name);
 	}
 #endif
 
 	if (in && !out) {
 		/* MAC logging for input chain only. */
-		printk("MAC=");
+		ve_printk(VE_LOG, "MAC=");
 		if (skb->dev && skb->dev->hard_header_len
 		    && skb->mac.raw != (void*)skb->nh.iph) {
 			int i, len;
@@ -400,13 +400,13 @@ ipt_log_packet(unsigned int pf,
 			len = (int)((unsigned char *)skb->nh.iph - p);
 			len = min((int)skb->dev->hard_header_len, len);
 			for (i = 0; i < len; i++,p++)
-				printk("%02x%c", *p, i==len - 1 ? ' ':':');
+				ve_printk(VE_LOG, "%02x%c", *p, i==len - 1 ? ' ':':');
 		} else
-			printk(" ");
+			ve_printk(VE_LOG, " ");
 	}
 
 	dump_packet(loginfo, skb, 0);
-	printk("\n");
+	ve_printk(VE_LOG, "\n");
 	spin_unlock_bh(&log_lock);
 }
 
@@ -463,10 +463,25 @@ static int ipt_log_checkentry(const char
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int ipt_log_compat(void *target,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_log_info)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_log_info));
+	return ipt_target_align_compat(target, dstptr, size, off, convert);
+}
+#endif
+
 static struct ipt_target ipt_log_reg = {
 	.name		= "LOG",
 	.target		= ipt_log_target,
 	.checkentry	= ipt_log_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= ipt_log_compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
@@ -487,6 +502,7 @@ static int __init init(void)
 		 * iptables userspace would abort */
 	}
 	
+
 	return 0;
 }
 
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ipt_MASQUERADE.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ipt_MASQUERADE.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ipt_MASQUERADE.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ipt_MASQUERADE.c	2007-08-28 17:35:32.000000000 +0400
@@ -120,6 +120,7 @@ masquerade_target(struct sk_buff **pskb,
 	return ip_nat_setup_info(ct, &newrange, hooknum);
 }
 
+#if 0
 static inline int
 device_cmp(struct ip_conntrack *i, void *ifindex)
 {
@@ -175,6 +176,7 @@ static struct notifier_block masq_dev_no
 static struct notifier_block masq_inet_notifier = {
 	.notifier_call	= masq_inet_event,
 };
+#endif
 
 static struct ipt_target masquerade = {
 	.name		= "MASQUERADE",
@@ -189,12 +191,16 @@ static int __init init(void)
 
 	ret = ipt_register_target(&masquerade);
 
+#if 0
+/*	These notifiers are unnecessary and may
+	lead to oops in virtual environments */
 	if (ret == 0) {
 		/* Register for device down reports */
 		register_netdevice_notifier(&masq_dev_notifier);
 		/* Register IP address change reports */
 		register_inetaddr_notifier(&masq_inet_notifier);
 	}
+#endif
 
 	return ret;
 }
@@ -202,8 +208,8 @@ static int __init init(void)
 static void __exit fini(void)
 {
 	ipt_unregister_target(&masquerade);
-	unregister_netdevice_notifier(&masq_dev_notifier);
-	unregister_inetaddr_notifier(&masq_inet_notifier);	
+/*	unregister_netdevice_notifier(&masq_dev_notifier);
+	unregister_inetaddr_notifier(&masq_inet_notifier);	*/
 }
 
 module_init(init);
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ipt_REDIRECT.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ipt_REDIRECT.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ipt_REDIRECT.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ipt_REDIRECT.c	2007-08-28 17:35:32.000000000 +0400
@@ -94,8 +94,14 @@ redirect_target(struct sk_buff **pskb,
 		
 		rcu_read_lock();
 		indev = __in_dev_get_rcu((*pskb)->dev);
-		if (indev && (ifa = indev->ifa_list))
+		if (indev && (ifa = indev->ifa_list)) {
+			/* because of venet device specific, we should use
+			 * second ifa in the list */
+			if (IN_LOOPBACK(ntohl(ifa->ifa_local)) &&
+					ifa->ifa_next)
+				ifa = ifa->ifa_next;
 			newdst = ifa->ifa_local;
+		}
 		rcu_read_unlock();
 
 		if (!newdst)
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ipt_REJECT.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ipt_REJECT.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ipt_REJECT.c	2007-08-24 19:28:35.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ipt_REJECT.c	2007-08-28 17:35:32.000000000 +0400
@@ -307,7 +307,7 @@ static int check(const char *tablename,
 	}
 
 	if (rejinfo->with == IPT_ICMP_ECHOREPLY) {
-		printk("REJECT: ECHOREPLY no longer supported.\n");
+		ve_printk(VE_LOG, "REJECT: ECHOREPLY no longer supported.\n");
 		return 0;
 	} else if (rejinfo->with == IPT_TCP_RESET) {
 		/* Must specify that it's a TCP packet */
@@ -321,10 +321,25 @@ static int check(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat(void *target,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_reject_info)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_reject_info));
+	return ipt_target_align_compat(target, dstptr, size, off, convert);
+}
+#endif
+
 static struct ipt_target ipt_reject_reg = {
 	.name		= "REJECT",
 	.target		= reject,
 	.checkentry	= check,
+#ifdef CONFIG_COMPAT
+	.compat		= compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ipt_TCPMSS.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ipt_TCPMSS.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ipt_TCPMSS.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ipt_TCPMSS.c	2007-08-28 17:35:32.000000000 +0400
@@ -229,7 +229,8 @@ ipt_tcpmss_checkentry(const char *tablen
 			((hook_mask & ~((1 << NF_IP_FORWARD)
 			   	| (1 << NF_IP_LOCAL_OUT)
 			   	| (1 << NF_IP_POST_ROUTING))) != 0)) {
-		printk("TCPMSS: path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n");
+		ve_printk(VE_LOG, "TCPMSS: path-MTU clamping only supported in"
+				" FORWARD, OUTPUT and POSTROUTING hooks\n");
 		return 0;
 	}
 
@@ -238,14 +239,29 @@ ipt_tcpmss_checkentry(const char *tablen
 	    && IPT_MATCH_ITERATE(e, find_syn_match))
 		return 1;
 
-	printk("TCPMSS: Only works on TCP SYN packets\n");
+	ve_printk(VE_LOG, "TCPMSS: Only works on TCP SYN packets\n");
 	return 0;
 }
 
+#ifdef CONFIG_COMPAT
+static int ipt_tcpmss_compat(void *target,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_tcpmss_info)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_tcpmss_info));
+	return ipt_target_align_compat(target, dstptr, size, off, convert);
+}
+#endif
+
 static struct ipt_target ipt_tcpmss_reg = {
 	.name		= "TCPMSS",
 	.target		= ipt_tcpmss_target,
 	.checkentry	= ipt_tcpmss_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= ipt_tcpmss_compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ipt_TOS.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ipt_TOS.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ipt_TOS.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ipt_TOS.c	2007-08-28 17:35:32.000000000 +0400
@@ -76,17 +76,32 @@ checkentry(const char *tablename,
 	    && tos != IPTOS_RELIABILITY
 	    && tos != IPTOS_MINCOST
 	    && tos != IPTOS_NORMALSVC) {
-		printk(KERN_WARNING "TOS: bad tos value %#x\n", tos);
+		ve_printk(VE_LOG, KERN_WARNING "TOS: bad tos value %#x\n", tos);
 		return 0;
 	}
 
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat(void *target,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_tos_target_info)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_tos_target_info));
+	return ipt_target_align_compat(target, dstptr, size, off, convert);
+}
+#endif
+
 static struct ipt_target ipt_tos_reg = {
 	.name		= "TOS",
 	.target		= target,
 	.checkentry	= checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ipt_multiport.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ipt_multiport.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ipt_multiport.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ipt_multiport.c	2007-08-28 17:35:36.000000000 +0400
@@ -174,11 +174,36 @@ checkentry_v1(const char *tablename,
 	return (matchsize == IPT_ALIGN(sizeof(struct ipt_multiport_v1)));
 }
 
+#ifdef CONFIG_COMPAT
+static int compat(void *match,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_multiport)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_multiport));
+	return ipt_match_align_compat(match, dstptr, size, off, convert);
+}
+
+static int compat_v1(void *match,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_multiport_v1)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_multiport_v1));
+	return ipt_match_align_compat(match, dstptr, size, off, convert);
+}
+#endif
+
 static struct ipt_match multiport_match = {
 	.name		= "multiport",
 	.revision	= 0,
 	.match		= &match,
 	.checkentry	= &checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
@@ -187,10 +212,13 @@ static struct ipt_match multiport_match_
 	.revision	= 1,
 	.match		= &match_v1,
 	.checkentry	= &checkentry_v1,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat_v1,
+#endif
 	.me		= THIS_MODULE,
 };
 
-static int __init init(void)
+static int __init iptable_multiport_init(void)
 {
 	int err;
 
@@ -204,11 +232,11 @@ static int __init init(void)
 	return err;
 }
 
-static void __exit fini(void)
+static void __exit iptable_multiport_fini(void)
 {
 	ipt_unregister_match(&multiport_match);
 	ipt_unregister_match(&multiport_match_v1);
 }
 
-module_init(init);
-module_exit(fini);
+module_init(iptable_multiport_init);
+module_exit(iptable_multiport_fini);
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ipt_owner.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ipt_owner.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ipt_owner.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ipt_owner.c	2007-08-28 17:35:32.000000000 +0400
@@ -87,15 +87,15 @@ static struct ipt_match owner_match = {
 	.me		= THIS_MODULE,
 };
 
-static int __init init(void)
+static int __init ipt_owner_init(void)
 {
 	return ipt_register_match(&owner_match);
 }
 
-static void __exit fini(void)
+static void __exit ipt_owner_fini(void)
 {
 	ipt_unregister_match(&owner_match);
 }
 
-module_init(init);
-module_exit(fini);
+module_init(ipt_owner_init);
+module_exit(ipt_owner_fini);
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ipt_tos.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ipt_tos.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ipt_tos.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ipt_tos.c	2007-08-28 17:35:32.000000000 +0400
@@ -44,10 +44,25 @@ checkentry(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat(void *match,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_tos_info)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_tos_info));
+	return ipt_match_align_compat(match, dstptr, size, off, convert);
+}
+#endif
+
 static struct ipt_match tos_match = {
 	.name		= "tos",
 	.match		= &match,
 	.checkentry	= &checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ipt_ttl.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ipt_ttl.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/ipt_ttl.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/ipt_ttl.c	2007-08-28 17:35:32.000000000 +0400
@@ -57,23 +57,37 @@ static int checkentry(const char *tablen
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat(void *match,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = IPT_ALIGN(sizeof(struct ipt_ttl_info)) -
+		COMPAT_IPT_ALIGN(sizeof(struct ipt_ttl_info));
+	return ipt_match_align_compat(match, dstptr, size, off, convert);
+}
+#endif
+
 static struct ipt_match ttl_match = {
 	.name		= "ttl",
 	.match		= &match,
 	.checkentry	= &checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
-static int __init init(void)
+static int __init ipt_ttl_init(void)
 {
 	return ipt_register_match(&ttl_match);
 }
 
-static void __exit fini(void)
+static void __exit ipt_ttl_fini(void)
 {
 	ipt_unregister_match(&ttl_match);
-
 }
 
-module_init(init);
-module_exit(fini);
+module_init(ipt_ttl_init);
+module_exit(ipt_ttl_fini);
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/iptable_filter.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/iptable_filter.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/iptable_filter.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/iptable_filter.c	2007-08-28 17:35:32.000000000 +0400
@@ -12,12 +12,20 @@
 
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/nfcalls.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("iptables filter table");
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_packet_filter	(get_exec_env()->_ve_ipt_filter_pf)
+#else
+#define	ve_packet_filter	&packet_filter
+#endif
+
 #define FILTER_VALID_HOOKS ((1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT))
 
 static struct
@@ -25,7 +33,7 @@ static struct
 	struct ipt_replace repl;
 	struct ipt_standard entries[3];
 	struct ipt_error term;
-} initial_table __initdata 
+} initial_table
 = { { "filter", FILTER_VALID_HOOKS, 4,
       sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
       { [NF_IP_LOCAL_IN] = 0,
@@ -90,7 +98,7 @@ ipt_hook(unsigned int hook,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
+	return ipt_do_table(pskb, hook, in, out, ve_packet_filter, NULL);
 }
 
 static unsigned int
@@ -108,7 +116,7 @@ ipt_local_out_hook(unsigned int hook,
 		return NF_ACCEPT;
 	}
 
-	return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
+	return ipt_do_table(pskb, hook, in, out, ve_packet_filter, NULL);
 }
 
 static struct nf_hook_ops ipt_ops[] = {
@@ -139,22 +147,19 @@ static struct nf_hook_ops ipt_ops[] = {
 static int forward = NF_ACCEPT;
 module_param(forward, bool, 0000);
 
-static int __init init(void)
+int init_iptable_filter(void)
 {
 	int ret;
-
-	if (forward < 0 || forward > NF_MAX_VERDICT) {
-		printk("iptables forward must be 0 or 1\n");
-		return -EINVAL;
-	}
-
-	/* Entry 1 is the FORWARD hook */
-	initial_table.entries[1].target.verdict = -forward - 1;
+	struct ipt_table *tmp_filter;
 
 	/* Register table */
-	ret = ipt_register_table(&packet_filter, &initial_table.repl);
-	if (ret < 0)
-		return ret;
+	tmp_filter = ipt_register_table(&packet_filter,
+			&initial_table.repl);
+	if (IS_ERR(tmp_filter))
+		return PTR_ERR(tmp_filter);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_filter = tmp_filter;
+#endif
 
 	/* Register hooks */
 	ret = nf_register_hook(&ipt_ops[0]);
@@ -176,19 +181,55 @@ static int __init init(void)
  cleanup_hook0:
 	nf_unregister_hook(&ipt_ops[0]);
  cleanup_table:
-	ipt_unregister_table(&packet_filter);
+	ipt_unregister_table(ve_packet_filter);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_filter = NULL;
+#endif
 
 	return ret;
 }
 
-static void __exit fini(void)
+void fini_iptable_filter(void)
 {
 	unsigned int i;
 
 	for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++)
 		nf_unregister_hook(&ipt_ops[i]);
 
-	ipt_unregister_table(&packet_filter);
+	ipt_unregister_table(ve_packet_filter);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_filter = NULL;
+#endif
+}
+
+static int __init init(void)
+{
+	int err;
+
+	if (forward < 0 || forward > NF_MAX_VERDICT) {
+		printk("iptables forward must be 0 or 1\n");
+		return -EINVAL;
+	}
+
+	/* Entry 1 is the FORWARD hook */
+	initial_table.entries[1].target.verdict = -forward - 1;
+
+	err = init_iptable_filter();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_filter);
+	KSYMRESOLVE(fini_iptable_filter);
+	KSYMMODRESOLVE(iptable_filter);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(iptable_filter);
+	KSYMUNRESOLVE(init_iptable_filter);
+	KSYMUNRESOLVE(fini_iptable_filter);
+	fini_iptable_filter();
 }
 
 module_init(init);
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/iptable_mangle.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/iptable_mangle.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/iptable_mangle.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/iptable_mangle.c	2007-08-28 17:35:32.000000000 +0400
@@ -17,6 +17,7 @@
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <net/route.h>
+#include <linux/nfcalls.h>
 #include <linux/ip.h>
 
 MODULE_LICENSE("GPL");
@@ -35,7 +36,7 @@ static struct
 	struct ipt_replace repl;
 	struct ipt_standard entries[5];
 	struct ipt_error term;
-} initial_table __initdata
+} initial_table
 = { { "mangle", MANGLE_VALID_HOOKS, 6,
       sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error),
       { [NF_IP_PRE_ROUTING] 	= 0,
@@ -112,6 +113,13 @@ static struct ipt_table packet_mangler =
 	.af		= AF_INET,
 };
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_packet_mangler	(get_exec_env()->_ipt_mangle_table)
+#else
+#define ve_packet_mangler	&packet_mangler
+#endif
+
 /* The work comes in here from netfilter.c. */
 static unsigned int
 ipt_route_hook(unsigned int hook,
@@ -120,7 +128,7 @@ ipt_route_hook(unsigned int hook,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+	return ipt_do_table(pskb, hook, in, out, ve_packet_mangler, NULL);
 }
 
 static unsigned int
@@ -149,7 +157,8 @@ ipt_local_hook(unsigned int hook,
 	daddr = (*pskb)->nh.iph->daddr;
 	tos = (*pskb)->nh.iph->tos;
 
-	ret = ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+	ret = ipt_do_table(pskb, hook, in, out, ve_packet_mangler, NULL);
+
 	/* Reroute for ANY change. */
 	if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE
 	    && ((*pskb)->nh.iph->saddr != saddr
@@ -201,14 +210,19 @@ static struct nf_hook_ops ipt_ops[] = {
 	},
 };
 
-static int __init init(void)
+static int mangle_init(struct nf_hook_ops ipt_ops[])
 {
 	int ret;
+	struct ipt_table *tmp_mangler;
 
 	/* Register table */
-	ret = ipt_register_table(&packet_mangler, &initial_table.repl);
-	if (ret < 0)
-		return ret;
+	tmp_mangler = ipt_register_table(&packet_mangler,
+			&initial_table.repl);
+	if (IS_ERR(tmp_mangler))
+		return PTR_ERR(tmp_mangler);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_mangler = tmp_mangler;
+#endif
 
 	/* Register hooks */
 	ret = nf_register_hook(&ipt_ops[0]);
@@ -242,19 +256,57 @@ static int __init init(void)
  cleanup_hook0:
 	nf_unregister_hook(&ipt_ops[0]);
  cleanup_table:
-	ipt_unregister_table(&packet_mangler);
+	ipt_unregister_table(ve_packet_mangler);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_mangler = NULL;
+#endif
 
 	return ret;
 }
 
-static void __exit fini(void)
+static void mangle_fini(struct nf_hook_ops ipt_ops[])
 {
 	unsigned int i;
 
-	for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++)
+	for (i = 0; i < 5; i++)
 		nf_unregister_hook(&ipt_ops[i]);
 
-	ipt_unregister_table(&packet_mangler);
+	ipt_unregister_table(ve_packet_mangler);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_mangler = NULL;
+#endif
+}
+
+int init_iptable_mangle(void)
+{
+	return mangle_init(ipt_ops);
+}
+
+void fini_iptable_mangle(void)
+{
+	mangle_fini(ipt_ops);
+}
+
+static int __init init(void)
+{
+	int err;
+
+	err = init_iptable_mangle();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_iptable_mangle);
+	KSYMRESOLVE(fini_iptable_mangle);
+	KSYMMODRESOLVE(iptable_mangle);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(iptable_mangle);
+	KSYMUNRESOLVE(init_iptable_mangle);
+	KSYMUNRESOLVE(fini_iptable_mangle);
+	fini_iptable_mangle();
 }
 
 module_init(init);
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/iptable_raw.c linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/iptable_raw.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/netfilter/iptable_raw.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/netfilter/iptable_raw.c	2007-08-28 17:35:32.000000000 +0400
@@ -118,12 +118,13 @@ static struct nf_hook_ops ipt_ops[] = {
 
 static int __init init(void)
 {
+	struct ipt_table *tmp;
 	int ret;
 
 	/* Register table */
-	ret = ipt_register_table(&packet_raw, &initial_table.repl);
-	if (ret < 0)
-		return ret;
+	tmp = ipt_register_table(&packet_raw, &initial_table.repl);
+	if (IS_ERR(tmp))
+		return PTR_ERR(tmp);
 
 	/* Register hooks */
 	ret = nf_register_hook(&ipt_ops[0]);
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/proc.c linux-2.6.16.46-0.12-027test011/net/ipv4/proc.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/proc.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/proc.c	2007-08-28 17:35:32.000000000 +0400
@@ -60,6 +60,9 @@ static int fold_prot_inuse(struct proto 
  */
 static int sockstat_seq_show(struct seq_file *seq, void *v)
 {
+	if (!ve_is_super(get_exec_env()))
+		return 0;
+
 	socket_seq_show(seq);
 	seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
 		   fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count),
@@ -258,11 +261,12 @@ static int snmp_seq_show(struct seq_file
 		seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
 
 	seq_printf(seq, "\nIp: %d %d",
-			ipv4_devconf.forwarding ? 1 : 2, sysctl_ip_default_ttl);
+			ve_ipv4_devconf.forwarding ? 1 : 2,
+			sysctl_ip_default_ttl);
 
 	for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   fold_field((void **) ip_statistics, 
+			   fold_field((void **) ve_ip_statistics,
 				      snmp4_ipstats_list[i].entry));
 
 	seq_puts(seq, "\nIcmp:");
@@ -272,7 +276,7 @@ static int snmp_seq_show(struct seq_file
 	seq_puts(seq, "\nIcmp:");
 	for (i = 0; snmp4_icmp_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   fold_field((void **) icmp_statistics, 
+			   fold_field((void **) ve_icmp_statistics,
 				      snmp4_icmp_list[i].entry));
 
 	seq_puts(seq, "\nTcp:");
@@ -284,11 +288,11 @@ static int snmp_seq_show(struct seq_file
 		/* MaxConn field is signed, RFC 2012 */
 		if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
 			seq_printf(seq, " %ld",
-				   fold_field((void **) tcp_statistics, 
+				   fold_field((void **) ve_tcp_statistics,
 					      snmp4_tcp_list[i].entry));
 		else
 			seq_printf(seq, " %lu",
-				   fold_field((void **) tcp_statistics,
+				   fold_field((void **) ve_tcp_statistics,
 					      snmp4_tcp_list[i].entry));
 	}
 
@@ -299,7 +303,7 @@ static int snmp_seq_show(struct seq_file
 	seq_puts(seq, "\nUdp:");
 	for (i = 0; snmp4_udp_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   fold_field((void **) udp_statistics, 
+			   fold_field((void **) ve_udp_statistics,
 				      snmp4_udp_list[i].entry));
 
 	seq_putc(seq, '\n');
@@ -333,7 +337,7 @@ static int netstat_seq_show(struct seq_f
 	seq_puts(seq, "\nTcpExt:");
 	for (i = 0; snmp4_net_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   fold_field((void **) net_statistics, 
+			   fold_field((void **) ve_net_statistics,
 				      snmp4_net_list[i].entry));
 
 	seq_putc(seq, '\n');
@@ -357,20 +361,20 @@ int __init ip_misc_proc_init(void)
 {
 	int rc = 0;
 
-	if (!proc_net_fops_create("netstat", S_IRUGO, &netstat_seq_fops))
+	if (!proc_glob_fops_create("net/netstat", S_IRUGO, &netstat_seq_fops))
 		goto out_netstat;
 
-	if (!proc_net_fops_create("snmp", S_IRUGO, &snmp_seq_fops))
+	if (!proc_glob_fops_create("net/snmp", S_IRUGO, &snmp_seq_fops))
 		goto out_snmp;
 
-	if (!proc_net_fops_create("sockstat", S_IRUGO, &sockstat_seq_fops))
+	if (!proc_glob_fops_create("net/sockstat", S_IRUGO, &sockstat_seq_fops))
 		goto out_sockstat;
 out:
 	return rc;
 out_sockstat:
-	proc_net_remove("snmp");
+	remove_proc_glob_entry("net/snmp", NULL);
 out_snmp:
-	proc_net_remove("netstat");
+	remove_proc_glob_entry("net/netstat", NULL);
 out_netstat:
 	rc = -ENOMEM;
 	goto out;
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/raw.c linux-2.6.16.46-0.12-027test011/net/ipv4/raw.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/raw.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/raw.c	2007-08-28 17:35:32.000000000 +0400
@@ -114,7 +114,8 @@ struct sock *__raw_v4_lookup(struct sock
 		if (inet->num == num 					&&
 		    !(inet->daddr && inet->daddr != raddr) 		&&
 		    !(inet->rcv_saddr && inet->rcv_saddr != laddr)	&&
-		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) &&
+		    ve_accessible_strict(sk->owner_env, get_exec_env()))
 			goto found; /* gotcha */
 	}
 	sk = NULL;
@@ -753,8 +754,12 @@ static struct sock *raw_get_first(struct
 		struct hlist_node *node;
 
 		sk_for_each(sk, node, &raw_v4_htable[state->bucket])
-			if (sk->sk_family == PF_INET)
+			if (sk->sk_family == PF_INET) {
+				if (!ve_accessible(sk->owner_env,
+							get_exec_env()))
+					continue;
 				goto found;
+			}
 	}
 	sk = NULL;
 found:
@@ -768,8 +773,13 @@ static struct sock *raw_get_next(struct 
 	do {
 		sk = sk_next(sk);
 try_again:
-		;
-	} while (sk && sk->sk_family != PF_INET);
+		if (!sk)
+			break;
+		if (sk->sk_family != PF_INET)
+			continue;
+		if (ve_accessible(sk->owner_env, get_exec_env()))
+			break;
+	} while (1);
 
 	if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) {
 		sk = sk_head(&raw_v4_htable[state->bucket]);
@@ -886,13 +896,13 @@ static struct file_operations raw_seq_fo
 
 int __init raw_proc_init(void)
 {
-	if (!proc_net_fops_create("raw", S_IRUGO, &raw_seq_fops))
+	if (!proc_glob_fops_create("net/raw", S_IRUGO, &raw_seq_fops))
 		return -ENOMEM;
 	return 0;
 }
 
 void __init raw_proc_exit(void)
 {
-	proc_net_remove("raw");
+	remove_proc_glob_entry("net/raw", NULL);
 }
 #endif /* CONFIG_PROC_FS */
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/route.c linux-2.6.16.46-0.12-027test011/net/ipv4/route.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/route.c	2007-08-24 19:28:09.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/route.c	2007-08-28 17:35:32.000000000 +0400
@@ -114,6 +114,8 @@
 
 #define RT_GC_TIMEOUT (300*HZ)
 
+int ip_rt_src_check		= 1;
+
 static int ip_rt_min_delay		= 2 * HZ;
 static int ip_rt_max_delay		= 10 * HZ;
 static int ip_rt_max_size;
@@ -253,11 +255,28 @@ static unsigned int rt_hash_code(u32 dad
 		& rt_hash_mask);
 }
 
+void prepare_rt_cache(void)
+{
+#ifdef CONFIG_VE
+	struct rtable *r;
+	int i;
+
+	for (i = rt_hash_mask; i >= 0; i--) {
+		spin_lock_bh(rt_hash_lock_addr(i));
+		for (r = rt_hash_table[i].chain; r; r = r->u.rt_next) {
+			r->fl.owner_env = get_ve0();
+		}
+		spin_unlock_bh(rt_hash_lock_addr(i));
+        }
+#endif
+}
+
 #ifdef CONFIG_PROC_FS
 struct rt_cache_iter_state {
 	int bucket;
 };
 
+static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r);
 static struct rtable *rt_cache_get_first(struct seq_file *seq)
 {
 	struct rtable *r = NULL;
@@ -270,6 +289,8 @@ static struct rtable *rt_cache_get_first
 			break;
 		rcu_read_unlock_bh();
 	}
+	if (r && !ve_accessible_strict(r->fl.owner_env, get_exec_env()))
+		r = rt_cache_get_next(seq, r);
 	return r;
 }
 
@@ -277,6 +298,7 @@ static struct rtable *rt_cache_get_next(
 {
 	struct rt_cache_iter_state *st = rcu_dereference(seq->private);
 
+loop:
 	r = r->u.rt_next;
 	while (!r) {
 		rcu_read_unlock_bh();
@@ -285,6 +307,8 @@ static struct rtable *rt_cache_get_next(
 		rcu_read_lock_bh();
 		r = rt_hash_table[st->bucket].chain;
 	}
+	if (r && !ve_accessible_strict(r->fl.owner_env, get_exec_env()))
+		goto loop;
 	return r;
 }
 
@@ -556,7 +580,8 @@ static inline int compare_keys(struct fl
 {
 	return memcmp(&fl1->nl_u.ip4_u, &fl2->nl_u.ip4_u, sizeof(fl1->nl_u.ip4_u)) == 0 &&
 	       fl1->oif     == fl2->oif &&
-	       fl1->iif     == fl2->iif;
+	       fl1->iif     == fl2->iif &&
+	       ve_accessible_strict(fl1->owner_env, fl2->owner_env);
 }
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
@@ -670,26 +695,105 @@ static void rt_check_expire(unsigned lon
 	mod_timer(&rt_periodic_timer, jiffies + ip_rt_gc_interval);
 }
 
+typedef unsigned long rt_flush_gen_t;
+
+#ifdef CONFIG_VE
+
+static rt_flush_gen_t rt_flush_gen;
+
+/* called under rt_flush_lock */
+static void set_rt_flush_required(struct ve_struct *env)
+{
+	/*
+	 * If the global generation rt_flush_gen is equal to G, then
+	 * the pass considering entries labelled by G is yet to come.
+	 */
+	env->rt_flush_required = rt_flush_gen;
+}
+
+static spinlock_t rt_flush_lock;
+static rt_flush_gen_t reset_rt_flush_required(void)
+{
+	rt_flush_gen_t g;
+
+	spin_lock_bh(&rt_flush_lock);
+	g = rt_flush_gen++;
+	spin_unlock_bh(&rt_flush_lock);
+	return g;
+}
+
+static int check_rt_flush_required(struct ve_struct *env, rt_flush_gen_t gen)
+{
+	/* can be checked without the lock */
+	return env->rt_flush_required >= gen;
+}
+
+#else
+
+static void set_rt_flush_required(struct ve_struct *env)
+{
+}
+
+static rt_flush_gen_t reset_rt_flush_required(void)
+{
+	return 0;
+}
+
+#endif
+
 /* This can run from both BH and non-BH contexts, the latter
  * in the case of a forced flush event.
  */
 static void rt_run_flush(unsigned long dummy)
 {
 	int i;
-	struct rtable *rth, *next;
+	struct rtable * rth, * next;
+	struct rtable * tail;
+	rt_flush_gen_t gen;
 
 	rt_deadline = 0;
 
 	get_random_bytes(&rt_hash_rnd, 4);
 
+	gen = reset_rt_flush_required();
+
 	for (i = rt_hash_mask; i >= 0; i--) {
+#ifdef CONFIG_VE
+		struct rtable ** prev, * p;
+
+		spin_lock_bh(rt_hash_lock_addr(i));
+		rth = rt_hash_table[i].chain;
+
+		/* defer releasing the head of the list after spin_unlock */
+		for (tail = rth; tail; tail = tail->u.rt_next)
+			if (!check_rt_flush_required(tail->fl.owner_env, gen))
+				break;
+		if (rth != tail)
+			rt_hash_table[i].chain = tail;
+
+		/* call rt_free on entries after the tail requiring flush */
+		prev = &rt_hash_table[i].chain;
+		for (p = *prev; p; p = next) {
+			next = p->u.rt_next;
+			if (!check_rt_flush_required(p->fl.owner_env, gen)) {
+				prev = &p->u.rt_next;
+			} else {
+				*prev = next;
+				rt_free(p);
+			}
+		}
+
+#else
 		spin_lock_bh(rt_hash_lock_addr(i));
 		rth = rt_hash_table[i].chain;
 		if (rth)
 			rt_hash_table[i].chain = NULL;
+		tail = NULL;
+
+#endif
 		spin_unlock_bh(rt_hash_lock_addr(i));
 
-		for (; rth; rth = next) {
+		for (; rth != tail; rth = next) {
 			next = rth->u.rt_next;
 			rt_free(rth);
 		}
@@ -728,6 +832,8 @@ void rt_cache_flush(int delay)
 			delay = tmo;
 	}
 
+	set_rt_flush_required(get_exec_env());
+
 	if (delay <= 0) {
 		spin_unlock_bh(&rt_flush_lock);
 		rt_run_flush(0);
@@ -743,9 +849,30 @@ void rt_cache_flush(int delay)
 
 static void rt_secret_rebuild(unsigned long dummy)
 {
+	int i;
+	struct rtable *rth, *next;
 	unsigned long now = jiffies;
 
-	rt_cache_flush(0);
+	spin_lock_bh(&rt_flush_lock);
+	del_timer(&rt_flush_timer);
+	spin_unlock_bh(&rt_flush_lock);
+
+	rt_deadline = 0;
+	get_random_bytes(&rt_hash_rnd, 4);
+
+	for (i = rt_hash_mask; i >= 0; i--) {
+		spin_lock_bh(rt_hash_lock_addr(i));
+		rth = rt_hash_table[i].chain;
+		if (rth)
+			rt_hash_table[i].chain = NULL;
+		spin_unlock_bh(rt_hash_lock_addr(i));
+
+		for (; rth; rth = next) {
+			next = rth->u.rt_next;
+			rt_free(rth);
+		}
+	}
+
 	mod_timer(&rt_secret_timer, now + ip_rt_secret_interval);
 }
 
@@ -1118,7 +1245,9 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
 	struct rtable *rth, **rthp;
 	u32  skeys[2] = { saddr, 0 };
 	int  ikeys[2] = { dev->ifindex, 0 };
+	struct ve_struct *ve;
 
+	ve = get_exec_env();
 	tos &= IPTOS_RT_MASK;
 
 	if (!in_dev)
@@ -1154,6 +1283,10 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
 				    rth->fl.fl4_src != skeys[i] ||
 				    rth->fl.fl4_tos != tos ||
 				    rth->fl.oif != ikeys[k] ||
+#ifdef CONFIG_VE
+				    !ve_accessible_strict(rth->fl.owner_env,
+					    		  ve) ||
+#endif
 				    rth->fl.iif != 0) {
 					rthp = &rth->u.rt_next;
 					continue;
@@ -1192,6 +1325,9 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
 				rt->u.dst.neighbour	= NULL;
 				rt->u.dst.hh		= NULL;
 				rt->u.dst.xfrm		= NULL;
+#ifdef CONFIG_VE
+				rt->fl.owner_env = ve;
+#endif
 
 				rt->rt_flags		|= RTCF_REDIRECTED;
 
@@ -1631,6 +1767,9 @@ static int ip_route_input_mc(struct sk_b
 #ifdef CONFIG_IP_ROUTE_FWMARK
 	rth->fl.fl4_fwmark= skb->nfmark;
 #endif
+#ifdef CONFIG_VE
+	rth->fl.owner_env = get_exec_env();
+#endif
 	rth->fl.fl4_src	= saddr;
 	rth->rt_src	= saddr;
 #ifdef CONFIG_NET_CLS_ROUTE
@@ -1776,6 +1915,9 @@ static inline int __mkroute_input(struct
 #ifdef CONFIG_IP_ROUTE_FWMARK
 	rth->fl.fl4_fwmark= skb->nfmark;
 #endif
+#ifdef CONFIG_VE
+	rth->fl.owner_env = get_exec_env();
+#endif
 	rth->fl.fl4_src	= saddr;
 	rth->rt_src	= saddr;
 	rth->rt_gateway	= daddr;
@@ -2021,6 +2163,9 @@ local_input:
 #ifdef CONFIG_IP_ROUTE_FWMARK
 	rth->fl.fl4_fwmark= skb->nfmark;
 #endif
+#ifdef CONFIG_VE
+	rth->fl.owner_env = get_exec_env();
+#endif
 	rth->fl.fl4_src	= saddr;
 	rth->rt_src	= saddr;
 #ifdef CONFIG_NET_CLS_ROUTE
@@ -2100,6 +2245,9 @@ int ip_route_input(struct sk_buff *skb, 
 #ifdef CONFIG_IP_ROUTE_FWMARK
 		    rth->fl.fl4_fwmark == skb->nfmark &&
 #endif
+#ifdef CONFIG_VE
+		    rth->fl.owner_env == get_exec_env() &&
+#endif
 		    rth->fl.fl4_tos == tos) {
 			rth->u.dst.lastuse = jiffies;
 			dst_hold(&rth->u.dst);
@@ -2226,6 +2374,9 @@ static inline int __mkroute_output(struc
 #ifdef CONFIG_IP_ROUTE_FWMARK
 	rth->fl.fl4_fwmark= oldflp->fl4_fwmark;
 #endif
+#ifdef CONFIG_VE
+	rth->fl.owner_env = get_exec_env();
+#endif
 	rth->rt_dst	= fl->fl4_dst;
 	rth->rt_src	= fl->fl4_src;
 	rth->rt_iif	= oldflp->oif ? : dev_out->ifindex;
@@ -2399,10 +2550,13 @@ static int ip_route_output_slow(struct r
 		    ZERONET(oldflp->fl4_src))
 			goto out;
 
-		/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
-		dev_out = ip_dev_find(oldflp->fl4_src);
-		if (dev_out == NULL)
-			goto out;
+		if (ip_rt_src_check) {
+			/* It is equivalent to
+			   inet_addr_type(saddr) == RTN_LOCAL */
+			dev_out = ip_dev_find(oldflp->fl4_src);
+			if (dev_out == NULL)
+				goto out;
+		}
 
 		/* I removed check for oif == dev_out->oif here.
 		   It was wrong for two reasons:
@@ -2429,6 +2583,12 @@ static int ip_route_output_slow(struct r
 			   Luckily, this hack is good workaround.
 			 */
 
+			if (dev_out == NULL) {
+				dev_out = ip_dev_find(oldflp->fl4_src);
+				if (dev_out == NULL)
+					goto out;
+			}
+
 			fl.oif = dev_out->ifindex;
 			goto make_route;
 		}
@@ -2575,6 +2735,7 @@ int __ip_route_output_key(struct rtable 
 #ifdef CONFIG_IP_ROUTE_FWMARK
 		    rth->fl.fl4_fwmark == flp->fl4_fwmark &&
 #endif
+		    ve_accessible_strict(rth->fl.owner_env, get_exec_env()) &&
 		    !((rth->fl.fl4_tos ^ flp->fl4_tos) &
 			    (IPTOS_RT_MASK | RTO_ONLINK))) {
 
@@ -2705,7 +2866,7 @@ static int rt_fill_info(struct sk_buff *
 		u32 dst = rt->rt_dst;
 
 		if (MULTICAST(dst) && !LOCAL_MCAST(dst) &&
-		    ipv4_devconf.mc_forwarding) {
+		    ve_ipv4_devconf.mc_forwarding) {
 			int err = ipmr_get_route(skb, r, nowait);
 			if (err <= 0) {
 				if (!nowait) {
@@ -2856,22 +3017,22 @@ void ip_rt_multicast_event(struct in_dev
 }
 
 #ifdef CONFIG_SYSCTL
-static int flush_delay;
+int ipv4_flush_delay;
 
-static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
+int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
 					struct file *filp, void __user *buffer,
 					size_t *lenp, loff_t *ppos)
 {
 	if (write) {
 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
-		rt_cache_flush(flush_delay);
+		rt_cache_flush(ipv4_flush_delay);
 		return 0;
 	} 
 
 	return -EINVAL;
 }
 
-static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
+int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
 						int __user *name,
 						int nlen,
 						void __user *oldval,
@@ -2893,7 +3054,7 @@ ctl_table ipv4_route_table[] = {
         {
 		.ctl_name 	= NET_IPV4_ROUTE_FLUSH,
 		.procname	= "flush",
-		.data		= &flush_delay,
+		.data		= &ipv4_flush_delay,
 		.maxlen		= sizeof(int),
 		.mode		= 0200,
 		.proc_handler	= &ipv4_sysctl_rtcache_flush,
@@ -3187,15 +3348,18 @@ int __init ip_rt_init(void)
 #ifdef CONFIG_PROC_FS
 	{
 	struct proc_dir_entry *rtstat_pde = NULL; /* keep gcc happy */
-	if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) ||
-	    !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO, 
-			    		     proc_net_stat))) {
+
+	if (!proc_glob_fops_create("net/rt_cache",
+				S_IRUGO, &rt_cache_seq_fops))
+		return -ENOMEM;
+
+	if (!(rtstat_pde = create_proc_glob_entry("net/stat/rt_cache",
+				S_IRUGO, NULL)))
 		return -ENOMEM;
-	}
 	rtstat_pde->proc_fops = &rt_cpu_seq_fops;
 	}
 #ifdef CONFIG_NET_CLS_ROUTE
-	create_proc_read_entry("rt_acct", 0, proc_net, ip_rt_acct_read, NULL);
+	create_proc_read_entry("net/rt_acct", 0, NULL, ip_rt_acct_read, NULL);
 #endif
 #endif
 #ifdef CONFIG_XFRM
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/sysctl_net_ipv4.c linux-2.6.16.46-0.12-027test011/net/ipv4/sysctl_net_ipv4.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/sysctl_net_ipv4.c	2007-08-24 19:28:13.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/sysctl_net_ipv4.c	2007-08-28 17:35:32.000000000 +0400
@@ -33,22 +33,21 @@ struct ipv4_config ipv4_config;
 
 #ifdef CONFIG_SYSCTL
 
-static
 int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
 			void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-	int val = ipv4_devconf.forwarding;
+	int val = ve_ipv4_devconf.forwarding;
 	int ret;
 
 	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
 
-	if (write && ipv4_devconf.forwarding != val)
+	if (write && ve_ipv4_devconf.forwarding != val)
 		inet_forward_change();
 
 	return ret;
 }
 
-static int ipv4_sysctl_forward_strategy(ctl_table *table,
+int ipv4_sysctl_forward_strategy(ctl_table *table,
 			 int __user *name, int nlen,
 			 void __user *oldval, size_t __user *oldlenp,
 			 void __user *newval, size_t newlen, 
@@ -625,6 +624,22 @@ ctl_table ipv4_table[] = {
 		.extra1		= &zero
 	},
 	{
+		.ctl_name       = NET_TCP_MAX_TW_KMEM_FRACTION,
+		.procname       = "tcp_max_tw_kmem_fraction",
+		.data           = &sysctl_tcp_max_tw_kmem_fraction,
+		.maxlen         = sizeof(int),
+		.mode           = 0644,
+		.proc_handler   = &proc_dointvec
+	},
+	{
+		.ctl_name       = NET_TCP_MAX_TW_BUCKETS_UB,
+		.procname       = "tcp_max_tw_buckets_ub",
+		.data           = &sysctl_tcp_max_tw_buckets_ub,
+		.maxlen         = sizeof(int),
+		.mode           = 0644,
+		.proc_handler   = &proc_dointvec
+	},
+	{
 		.ctl_name	= NET_TCP_NO_METRICS_SAVE,
 		.procname	= "tcp_no_metrics_save",
 		.data		= &sysctl_tcp_nometrics_save,
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/tcp.c linux-2.6.16.46-0.12-027test011/net/ipv4/tcp.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/tcp.c	2007-08-24 19:28:35.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/tcp.c	2007-08-28 17:35:32.000000000 +0400
@@ -248,6 +248,7 @@
  */
 
 #include <linux/config.h>
+#include <linux/kmem_cache.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/fcntl.h>
@@ -265,6 +266,10 @@
 #include <net/ip.h>
 #include <net/netdma.h>
 
+#include <ub/ub_orphan.h>
+#include <ub/ub_net.h>
+#include <ub/ub_tcp.h>
+
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
 
@@ -322,6 +327,7 @@ unsigned int tcp_poll(struct file *file,
 	unsigned int mask;
 	struct sock *sk = sock->sk;
 	struct tcp_sock *tp = tcp_sk(sk);
+	int check_send_space;
 
 	poll_wait(file, sk->sk_sleep, wait);
 	if (sk->sk_state == TCP_LISTEN)
@@ -336,6 +342,21 @@ unsigned int tcp_poll(struct file *file,
 	if (sk->sk_err)
 		mask = POLLERR;
 
+	check_send_space = 1;
+#ifdef CONFIG_USER_RESOURCE
+	if (!(sk->sk_shutdown & SEND_SHUTDOWN) && sock_has_ubc(sk)) {
+		unsigned long size;
+		size = MAX_TCP_HEADER + tp->mss_cache;
+		if (size > SOCK_MIN_UBCSPACE)
+			size = SOCK_MIN_UBCSPACE;
+		size = skb_charge_size(size);
+		if (ub_sock_makewres_tcp(sk, size)) {
+			check_send_space = 0;
+			ub_sock_sndqueueadd_tcp(sk, size);
+		}
+	}
+#endif
+
 	/*
 	 * POLLHUP is certainly not done right. But poll() doesn't
 	 * have a notion of HUP in just one direction, and for a
@@ -379,7 +400,7 @@ unsigned int tcp_poll(struct file *file,
 		     sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data))
 			mask |= POLLIN | POLLRDNORM;
 
-		if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
+		if (check_send_space && !(sk->sk_shutdown & SEND_SHUTDOWN)) {
 			if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
 				mask |= POLLOUT | POLLWRNORM;
 			} else {  /* send SIGIO later */
@@ -529,16 +550,23 @@ static ssize_t do_tcp_sendpages(struct s
 		int copy, i, can_coalesce;
 		int offset = poffset % PAGE_SIZE;
 		int size = min_t(size_t, psize, PAGE_SIZE - offset);
+		unsigned long chargesize = 0;
 
 		if (!sk->sk_send_head || (copy = size_goal - skb->len) <= 0) {
 new_segment:
+			chargesize = 0;
 			if (!sk_stream_memory_free(sk))
 				goto wait_for_sndbuf;
 
+			chargesize = skb_charge_size(MAX_TCP_HEADER +
+					tp->mss_cache);
+			if (ub_sock_getwres_tcp(sk, chargesize) < 0)
+				goto wait_for_ubspace;
 			skb = sk_stream_alloc_pskb(sk, 0, 0,
 						   sk->sk_allocation);
 			if (!skb)
 				goto wait_for_memory;
+			ub_skb_set_charge(skb, sk, chargesize, UB_TCPSNDBUF);
 
 			skb_entail(sk, tp, skb);
 			copy = size_goal;
@@ -594,10 +622,15 @@ new_segment:
 wait_for_sndbuf:
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 wait_for_memory:
+		ub_sock_retwres_tcp(sk, chargesize,
+			skb_charge_size(MAX_TCP_HEADER+tp->mss_cache));
+		chargesize = 0;
+wait_for_ubspace:
 		if (copied)
 			tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
 
-		if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
+		err = __sk_stream_wait_memory(sk, &timeo, chargesize);
+		if (err != 0)
 			goto do_error;
 
 		mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
@@ -634,9 +667,6 @@ ssize_t tcp_sendpage(struct socket *sock
 	return res;
 }
 
-#define TCP_PAGE(sk)	(sk->sk_sndmsg_page)
-#define TCP_OFF(sk)	(sk->sk_sndmsg_off)
-
 static inline int select_size(struct sock *sk, struct tcp_sock *tp)
 {
 	int tmp = tp->mss_cache;
@@ -696,6 +726,7 @@ int tcp_sendmsg(struct kiocb *iocb, stru
 	while (--iovlen >= 0) {
 		int seglen = iov->iov_len;
 		unsigned char __user *from = iov->iov_base;
+		unsigned long chargesize = 0;
 
 		iov++;
 
@@ -706,18 +737,26 @@ int tcp_sendmsg(struct kiocb *iocb, stru
 
 			if (!sk->sk_send_head ||
 			    (copy = size_goal - skb->len) <= 0) {
+				unsigned long size;
 
 new_segment:
 				/* Allocate new segment. If the interface is SG,
 				 * allocate skb fitting to single page.
 				 */
+				chargesize = 0;
 				if (!sk_stream_memory_free(sk))
 					goto wait_for_sndbuf;
-
-				skb = sk_stream_alloc_pskb(sk, select_size(sk, tp),
-							   0, sk->sk_allocation);
+				size = select_size(sk, tp);
+				chargesize = skb_charge_size(MAX_TCP_HEADER +
+						size);
+				if (ub_sock_getwres_tcp(sk, chargesize) < 0)
+					goto wait_for_ubspace;
+				skb = sk_stream_alloc_pskb(sk, size, 0,
+						sk->sk_allocation);
 				if (!skb)
 					goto wait_for_memory;
+				ub_skb_set_charge(skb, sk, chargesize,
+						UB_TCPSNDBUF);
 
 				/*
 				 * Check whether we can use HW checksum.
@@ -763,6 +802,7 @@ new_segment:
 				} else if (page) {
 					if (off == PAGE_SIZE) {
 						put_page(page);
+						ub_sock_tcp_detachpage(sk);
 						TCP_PAGE(sk) = page = NULL;
 						off = 0;
 					}
@@ -776,6 +816,9 @@ new_segment:
 					goto wait_for_memory;
 
 				if (!page) {
+					chargesize = PAGE_SIZE;
+					if (ub_sock_tcp_chargepage(sk) < 0)
+						goto wait_for_ubspace;
 					/* Allocate new cache page. */
 					if (!(page = sk_stream_alloc_page(sk)))
 						goto wait_for_memory;
@@ -807,7 +850,8 @@ new_segment:
 					} else if (off + copy < PAGE_SIZE) {
 						get_page(page);
 						TCP_PAGE(sk) = page;
-					}
+					} else
+						ub_sock_tcp_detachpage(sk);
 				}
 
 				TCP_OFF(sk) = off + copy;
@@ -838,11 +882,17 @@ new_segment:
 wait_for_sndbuf:
 			set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 wait_for_memory:
+			ub_sock_retwres_tcp(sk, chargesize,
+				skb_charge_size(MAX_TCP_HEADER + tp->mss_cache));
+			chargesize = 0;
+wait_for_ubspace:
+
 			if (copied)
 				tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
 
-			if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
-				goto do_error;
+			err = __sk_stream_wait_memory(sk, &timeo, chargesize);
+			if (err != 0)
+  				goto do_error;
 
 			mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
 			size_goal = tp->xmit_size_goal;
@@ -939,7 +989,18 @@ void tcp_cleanup_rbuf(struct sock *sk, i
 #if TCP_DEBUG
 	struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
 
-	BUG_TRAP(!skb || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq));
+	if (!(skb==NULL || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq))) {
+		printk("KERNEL: assertion: skb==NULL || "
+				"before(tp->copied_seq, skb->end_seq)\n");
+		printk("VE%u pid %d comm %.16s\n",
+				(get_exec_env() ? VEID(get_exec_env()) : 0),
+				current->pid, current->comm);
+		printk("copied=%d, copied_seq=%d, rcv_nxt=%d\n", copied,
+				tp->copied_seq, tp->rcv_nxt);
+		printk("skb->len=%d, skb->seq=%d, skb->end_seq=%d\n",
+				skb->len, TCP_SKB_CB(skb)->seq,
+				TCP_SKB_CB(skb)->end_seq);
+	}
 #endif
 
 	if (inet_csk_ack_scheduled(sk)) {
@@ -1175,7 +1236,22 @@ int tcp_recvmsg(struct kiocb *iocb, stru
 				goto found_ok_skb;
 			if (skb->h.th->fin)
 				goto found_fin_ok;
-			BUG_TRAP(flags & MSG_PEEK);
+			if (!(flags & MSG_PEEK)) {
+				printk("KERNEL: assertion: flags&MSG_PEEK\n");
+				printk("VE%u pid %d comm %.16s\n",
+						(get_exec_env() ?
+						 VEID(get_exec_env()) : 0),
+						current->pid, current->comm);
+				printk("flags=0x%x, len=%d, copied_seq=%d, "
+						"rcv_nxt=%d\n", flags, (int)len,
+						tp->copied_seq, tp->rcv_nxt);
+				printk("skb->len=%d, *seq=%d, skb->seq=%d, "
+						"skb->end_seq=%d, offset=%d\n",
+						skb->len, *seq,
+						TCP_SKB_CB(skb)->seq,
+						TCP_SKB_CB(skb)->end_seq,
+						offset);
+			}
 			skb = skb->next;
 		} while (skb != (struct sk_buff *)&sk->sk_receive_queue);
 
@@ -1238,8 +1314,18 @@ int tcp_recvmsg(struct kiocb *iocb, stru
 
 			tp->ucopy.len = len;
 
-			BUG_TRAP(tp->copied_seq == tp->rcv_nxt ||
-				 (flags & (MSG_PEEK | MSG_TRUNC)));
+			if (!(tp->copied_seq == tp->rcv_nxt ||
+						(flags&(MSG_PEEK|MSG_TRUNC)))) {
+				printk("KERNEL: assertion: tp->copied_seq == "
+						"tp->rcv_nxt || ...\n");
+				printk("VE%u pid %d comm %.16s\n",
+						(get_exec_env() ?
+						 VEID(get_exec_env()) : 0),
+						current->pid, current->comm);
+				printk("flags=0x%x, len=%d, copied_seq=%d, "
+						"rcv_nxt=%d\n", flags, (int)len,
+						tp->copied_seq, tp->rcv_nxt);
+			}
 
 			/* Ugly... If prequeue is not empty, we have to
 			 * process it before releasing socket, otherwise
@@ -1617,6 +1703,9 @@ adjudge_to_death:
 	/* It is the last release_sock in its life. It will remove backlog. */
 	release_sock(sk);
 
+#include <ub/ub_orphan.h>
+#include <ub/ub_net.h>
+#include <ub/ub_tcp.h>
 
 	/* Now socket is owned by kernel and we acquire BH lock
 	   to finish close. No need to check for user refs.
@@ -1654,7 +1743,7 @@ adjudge_to_death:
 			if (tmo > TCP_TIMEWAIT_LEN) {
 				inet_csk_reset_keepalive_timer(sk, tcp_fin_time(sk));
 			} else {
-				atomic_inc(sk->sk_prot->orphan_count);
+				ub_inc_orphan_count(sk);
 				tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
 				goto out;
 			}
@@ -1662,9 +1751,7 @@ adjudge_to_death:
 	}
 	if (sk->sk_state != TCP_CLOSE) {
 		sk_stream_mem_reclaim(sk);
-		if (atomic_read(sk->sk_prot->orphan_count) > sysctl_tcp_max_orphans ||
-		    (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
-		     atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
+		if (ub_too_many_orphans(sk, ub_get_orphan_count(sk))) {
 			if (net_ratelimit())
 				printk(KERN_INFO "TCP: too many of orphaned "
 				       "sockets\n");
@@ -1673,7 +1760,7 @@ adjudge_to_death:
 			NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
 		}
 	}
-	atomic_inc(sk->sk_prot->orphan_count);
+	ub_inc_orphan_count(sk);
 
 	if (sk->sk_state == TCP_CLOSE)
 		inet_csk_destroy_sock(sk);
@@ -1744,6 +1831,7 @@ int tcp_disconnect(struct sock *sk, int 
 	tp->snd_ssthresh = 0x7fffffff;
 	tp->snd_cwnd_cnt = 0;
 	tp->bytes_acked = 0;
+	tp->advmss = 65535;
 	tcp_set_ca_state(sk, TCP_CA_Open);
 	tcp_clear_retrans(tp);
 	inet_csk_delack_init(sk);
@@ -2172,6 +2260,7 @@ out:
 }
 
 extern void __skb_cb_too_small_for_tcp(int, int);
+extern unsigned int nr_free_lowpages(void);
 extern struct tcp_congestion_ops tcp_reno;
 
 static __initdata unsigned long thash_entries;
@@ -2188,6 +2277,7 @@ void __init tcp_init(void)
 {
 	struct sk_buff *skb = NULL;
 	int order, i;
+	unsigned long goal;
 
 	if (sizeof(struct tcp_skb_cb) > sizeof(skb->cb))
 		__skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb),
@@ -2196,7 +2286,7 @@ void __init tcp_init(void)
 	tcp_hashinfo.bind_bucket_cachep =
 		kmem_cache_create("tcp_bind_bucket",
 				  sizeof(struct inet_bind_bucket), 0,
-				  SLAB_HWCACHE_ALIGN, NULL, NULL);
+				  SLAB_HWCACHE_ALIGN | SLAB_UBC, NULL, NULL);
 	if (!tcp_hashinfo.bind_bucket_cachep)
 		panic("tcp_init: Cannot alloc tcp_bind_bucket cache.");
 
@@ -2257,10 +2347,19 @@ void __init tcp_init(void)
 		sysctl_max_syn_backlog = 128;
 	}
 
+	goal = nr_free_lowpages() / 6;
+	while (order >= 3 && (1536<<order) > goal)
+		order--;
+
 	sysctl_tcp_mem[0] =  768 << order;
 	sysctl_tcp_mem[1] = 1024 << order;
 	sysctl_tcp_mem[2] = 1536 << order;
 
+	if (sysctl_tcp_mem[2] - sysctl_tcp_mem[1] > 4096)
+		sysctl_tcp_mem[1] = sysctl_tcp_mem[2] - 4096;
+	if (sysctl_tcp_mem[1] - sysctl_tcp_mem[0] > 4096)
+		sysctl_tcp_mem[0] = sysctl_tcp_mem[1] - 4096;
+
 	if (order < 3) {
 		sysctl_tcp_wmem[2] = 64 * 1024;
 		sysctl_tcp_rmem[0] = PAGE_SIZE;
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/tcp_input.c linux-2.6.16.46-0.12-027test011/net/ipv4/tcp_input.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/tcp_input.c	2007-08-24 19:28:35.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/tcp_input.c	2007-08-28 17:35:30.000000000 +0400
@@ -73,6 +73,8 @@
 #include <asm/unaligned.h>
 #include <net/netdma.h>
 
+#include <ub/ub_tcp.h>
+
 int sysctl_tcp_timestamps = 1;
 int sysctl_tcp_window_scaling = 1;
 int sysctl_tcp_sack = 1;
@@ -253,7 +255,7 @@ static void tcp_grow_window(struct sock 
 	/* Check #1 */
 	if (tp->rcv_ssthresh < tp->window_clamp &&
 	    (int)tp->rcv_ssthresh < tcp_space(sk) &&
-	    !tcp_memory_pressure) {
+	    ub_tcp_rmem_allows_expand(sk)) {
 		int incr;
 
 		/* Check #2. Increase window, if skb with such overhead
@@ -322,6 +324,8 @@ static void tcp_init_buffer_space(struct
 
 	tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
 	tp->snd_cwnd_stamp = tcp_time_stamp;
+
+	ub_tcp_update_maxadvmss(sk);
 }
 
 /* 5. Recalculate window clamp after socket hit its memory bounds. */
@@ -333,7 +337,7 @@ static void tcp_clamp_window(struct sock
 
 	if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
 	    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
-	    !tcp_memory_pressure &&
+	    !ub_tcp_memory_pressure(sk) &&
 	    atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
 		sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
 				    sysctl_tcp_rmem[2]);
@@ -3120,7 +3124,7 @@ queue_and_out:
 			     !sk_stream_rmem_schedule(sk, skb))) {
 				if (tcp_prune_queue(sk) < 0 ||
 				    !sk_stream_rmem_schedule(sk, skb))
-					goto drop;
+					goto drop_part;
 			}
 			sk_stream_set_owner_r(skb, sk);
 			__skb_queue_tail(&sk->sk_receive_queue, skb);
@@ -3164,6 +3168,12 @@ out_of_window:
 drop:
 		__kfree_skb(skb);
 		return;
+
+drop_part:
+		if (after(tp->copied_seq, tp->rcv_nxt))
+			tp->rcv_nxt = tp->copied_seq;
+		__kfree_skb(skb);
+		return;
 	}
 
 	/* Out of window. F.e. zero window probe. */
@@ -3335,6 +3345,10 @@ tcp_collapse(struct sock *sk, struct sk_
 		nskb = alloc_skb(copy+header, GFP_ATOMIC);
 		if (!nskb)
 			return;
+		if (ub_tcprcvbuf_charge_forced(skb->sk, nskb) < 0) {
+			kfree_skb(nskb);
+			return;
+		}
 		skb_reserve(nskb, header);
 		memcpy(nskb->head, skb->head, header);
 		nskb->nh.raw = nskb->head + (skb->nh.raw-skb->head);
@@ -3431,7 +3445,7 @@ static int tcp_prune_queue(struct sock *
 
 	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
 		tcp_clamp_window(sk, tp);
-	else if (tcp_memory_pressure)
+	else if (ub_tcp_memory_pressure(sk))
 		tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
 
 	tcp_collapse_ofo_queue(sk);
@@ -3507,7 +3521,7 @@ static int tcp_should_expand_sndbuf(stru
 		return 0;
 
 	/* If we are under global TCP memory pressure, do not expand.  */
-	if (tcp_memory_pressure)
+	if (ub_tcp_memory_pressure(sk))
 		return 0;
 
 	/* If we are under soft global TCP memory pressure, do not expand.  */
@@ -3954,6 +3968,10 @@ int tcp_rcv_established(struct sock *sk,
 
 				if ((int)skb->truesize > sk->sk_forward_alloc)
 					goto step5;
+				/* This is OK not to try to free memory here.
+				 * Do this below on slow path. Den */
+				if (ub_tcprcvbuf_charge(sk, skb) < 0)
+					goto step5;
 
 				NET_INC_STATS_BH(LINUX_MIB_TCPHPHITS);
 
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/tcp_ipv4.c linux-2.6.16.46-0.12-027test011/net/ipv4/tcp_ipv4.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/tcp_ipv4.c	2007-08-24 19:28:35.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/tcp_ipv4.c	2007-08-28 17:35:33.000000000 +0400
@@ -73,6 +73,8 @@
 #include <net/xfrm.h>
 #include <net/netdma.h>
 
+#include <ub/ub_tcp.h>
+
 #include <linux/inet.h>
 #include <linux/ipv6.h>
 #include <linux/stddef.h>
@@ -622,7 +624,8 @@ static void tcp_v4_timewait_ack(struct s
 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
 
 	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
-			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_ts_recent);
+		tcptw->tw_rcv_wnd >> (tw->tw_rcv_wscale& TW_WSCALE_MASK),
+		tcptw->tw_ts_recent);
 
 	inet_twsk_put(tw);
 }
@@ -724,6 +727,7 @@ struct request_sock_ops tcp_request_sock
 	.destructor	=	tcp_v4_reqsk_destructor,
 	.send_reset	=	tcp_v4_send_reset,
 };
+EXPORT_SYMBOL_GPL(tcp_request_sock_ops);
 
 static struct timewait_sock_ops tcp_timewait_sock_ops = {
 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
@@ -998,12 +1002,15 @@ static int tcp_v4_checksum_init(struct s
  */
 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 {
+	struct user_beancounter *ub;
+
+	ub = set_exec_ub(sock_bc(sk)->ub);
 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
 		TCP_CHECK_TIMER(sk);
 		if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
 			goto reset;
 		TCP_CHECK_TIMER(sk);
-		return 0;
+		goto restore_context;
 	}
 
 	if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
@@ -1017,7 +1024,7 @@ int tcp_v4_do_rcv(struct sock *sk, struc
 		if (nsk != sk) {
 			if (tcp_child_process(sk, nsk, skb))
 				goto reset;
-			return 0;
+			goto restore_context;
 		}
 	}
 
@@ -1025,6 +1032,9 @@ int tcp_v4_do_rcv(struct sock *sk, struc
 	if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
 		goto reset;
 	TCP_CHECK_TIMER(sk);
+
+restore_context:
+	(void)set_exec_ub(ub);
 	return 0;
 
 reset:
@@ -1036,7 +1046,7 @@ discard:
 	 * might be destroyed here. This current version compiles correctly,
 	 * but you have been warned.
 	 */
-	return 0;
+	goto restore_context;
 
 csum_err:
 	TCP_INC_STATS_BH(TCP_MIB_INERRS);
@@ -1287,6 +1297,8 @@ static int tcp_v4_init_sock(struct sock 
 	tp->snd_cwnd_clamp = ~0;
 	tp->mss_cache = 536;
 
+	tp->advmss = 65535; /* max value */
+
 	tp->reordering = sysctl_tcp_reordering;
 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
 
@@ -1336,6 +1348,8 @@ int tcp_v4_destroy_sock(struct sock *sk)
 	 * If sendmsg cached page exists, toss it.
 	 */
 	if (sk->sk_sndmsg_page) {
+		/* queue is empty, uncharge */
+		ub_sock_tcp_detachpage(sk);
 		__free_page(sk->sk_sndmsg_page);
 		sk->sk_sndmsg_page = NULL;
 	}
@@ -1350,16 +1364,34 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
 #ifdef CONFIG_PROC_FS
 /* Proc filesystem TCP sock list dumping. */
 
-static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
+static inline struct inet_timewait_sock *tw_head(struct hlist_head *head,
+		envid_t veid)
 {
-	return hlist_empty(head) ? NULL :
-		list_entry(head->first, struct inet_timewait_sock, tw_node);
+	struct inet_timewait_sock *tw;
+	struct hlist_node *pos;
+
+	if (hlist_empty(head))
+		return NULL;
+	hlist_for_each_entry(tw, pos, head, tw_node) {
+		if (!ve_accessible_veid(tw->tw_owner_env, veid))
+			continue;
+		return tw;
+	}
+	return NULL;
 }
 
-static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
+static inline struct inet_timewait_sock *
+	tw_next(struct inet_timewait_sock *tw, envid_t veid)
 {
-	return tw->tw_node.next ?
-		hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
+	while (1) {
+		if (tw->tw_node.next == NULL)
+			return NULL;
+		tw = hlist_entry(tw->tw_node.next, typeof(*tw), tw_node);
+		if (!ve_accessible_veid(tw->tw_owner_env, veid))
+			continue;
+		return tw;
+	}
+	return NULL;	/* make compiler happy */
 }
 
 static void *listening_get_next(struct seq_file *seq, void *cur, int noopenreq)
@@ -1368,7 +1400,9 @@ static void *listening_get_next(struct s
 	struct hlist_node *node;
 	struct sock *sk = cur;
 	struct tcp_iter_state* st = seq->private;
+	struct ve_struct *ve;
 
+	ve = get_exec_env();
 	if (!sk) {
 		st->bucket = 0;
 		sk = sk_head(&tcp_hashinfo.listening_hash[0]);
@@ -1412,6 +1446,8 @@ get_req:
 	}
 get_sk:
 	sk_for_each_from(sk, node) {
+		if (!ve_accessible(sk->owner_env, ve))
+			continue;
 		if (sk->sk_family == st->family) {
 			cur = sk;
 			goto out;
@@ -1454,7 +1490,9 @@ static void *established_get_first(struc
 {
 	struct tcp_iter_state* st = seq->private;
 	void *rc = NULL;
+	struct ve_struct *ve;
 
+	ve = get_exec_env();
 	for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
 		struct sock *sk;
 		struct hlist_node *node;
@@ -1465,6 +1503,8 @@ static void *established_get_first(struc
 
 		read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
 		sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
+			if (!ve_accessible(sk->owner_env, ve))
+				continue;
 			if (sk->sk_family != st->family) {
 				continue;
 			}
@@ -1474,6 +1514,8 @@ static void *established_get_first(struc
 		st->state = TCP_SEQ_STATE_TIME_WAIT;
 		inet_twsk_for_each(tw, node,
 				   &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) {
+			if (!ve_accessible_veid(tw->tw_owner_env, VEID(ve)))
+				continue;
 			if (tw->tw_family != st->family) {
 				continue;
 			}
@@ -1493,16 +1535,17 @@ static void *established_get_next(struct
 	struct inet_timewait_sock *tw;
 	struct hlist_node *node;
 	struct tcp_iter_state* st = seq->private;
+	struct ve_struct *ve;
 
+	ve = get_exec_env();
 	++st->num;
 
 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
 		tw = cur;
-		tw = tw_next(tw);
+		tw = tw_next(tw, VEID(ve));
 get_tw:
-		while (tw && tw->tw_family != st->family) {
-			tw = tw_next(tw);
-		}
+		while (tw && tw->tw_family != st->family)
+			tw = tw_next(tw, VEID(ve));
 		if (tw) {
 			cur = tw;
 			goto out;
@@ -1524,12 +1567,15 @@ get_tw:
 		sk = sk_next(sk);
 
 	sk_for_each_from(sk, node) {
+		if (!ve_accessible(sk->owner_env, ve))
+			continue;
 		if (sk->sk_family == st->family)
 			goto found;
 	}
 
 	st->state = TCP_SEQ_STATE_TIME_WAIT;
-	tw = tw_head(&tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain);
+	tw = tw_head(&tcp_hashinfo.ehash[st->bucket +
+			tcp_hashinfo.ehash_size].chain, VEID(ve));
 	goto get_tw;
 found:
 	cur = sk;
@@ -1701,7 +1747,7 @@ int tcp_proc_register(struct tcp_seq_afi
 	afinfo->seq_fops->llseek	= seq_lseek;
 	afinfo->seq_fops->release	= seq_release_private;
 	
-	p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
+	p = proc_glob_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
 	if (p)
 		p->data = afinfo;
 	else
@@ -1713,7 +1759,8 @@ void tcp_proc_unregister(struct tcp_seq_
 {
 	if (!afinfo)
 		return;
-	proc_net_remove(afinfo->name);
+
+	remove_proc_glob_entry(afinfo->name, NULL);
 	memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); 
 }
 
@@ -1843,7 +1890,7 @@ out:
 static struct file_operations tcp4_seq_fops;
 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
 	.owner		= THIS_MODULE,
-	.name		= "tcp",
+	.name		= "net/tcp",
 	.family		= AF_INET,
 	.seq_start	= tcp_seq_start, 
 	.seq_show	= tcp4_seq_show,
@@ -1928,6 +1975,86 @@ void __init tcp_v4_init(struct net_proto
 	tcp_socket->sk->sk_prot->unhash(tcp_socket->sk);
 }
 
+#ifdef CONFIG_VE
+static void tcp_kill_ve_onesk(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	/* Check the assumed state of the socket. */
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		static int printed;
+invalid:
+		if (!printed)
+			printk(KERN_DEBUG "Killing sk: dead %d, state %d, "
+				"wrseq %u unseq %u, wrqu %d.\n",
+				sock_flag(sk, SOCK_DEAD), sk->sk_state,
+				tp->write_seq, tp->snd_una,
+				!skb_queue_empty(&sk->sk_write_queue));
+		printed = 1;
+		return;
+	}
+
+	tcp_send_active_reset(sk, GFP_ATOMIC);
+	switch (sk->sk_state) {
+		case TCP_FIN_WAIT1:
+		case TCP_CLOSING:
+			/* In these 2 states the peer may want us to retransmit
+			 * some data and/or FIN.  Entering "resetting mode"
+			 * instead.
+			 */
+			tcp_time_wait(sk, TCP_CLOSE, 0);
+			break;
+		case TCP_FIN_WAIT2:
+			/* By some reason the socket may stay in this state
+			 * without turning into a TW bucket.  Fix it.
+			 */
+			tcp_time_wait(sk, TCP_FIN_WAIT2, 0);
+			break;
+		case TCP_LAST_ACK:
+			/* Just jump into CLOSED state. */
+			tcp_done(sk);
+			break;
+		default:
+			/* The socket must be already close()d. */
+			goto invalid;
+	}
+}
+
+void tcp_v4_kill_ve_sockets(struct ve_struct *envid)
+{
+	struct inet_ehash_bucket *head;
+	int i;
+
+	/* alive */
+	local_bh_disable();
+	head = tcp_hashinfo.ehash;
+	for (i = 0; i < tcp_hashinfo.ehash_size; i++) {
+		struct sock *sk;
+		struct hlist_node *node;
+more_work:
+		write_lock(&head[i].lock);
+		sk_for_each(sk, node, &head[i].chain) {
+			if (ve_accessible_strict(sk->owner_env, envid)) {
+				sock_hold(sk);
+				write_unlock(&head[i].lock);
+
+				bh_lock_sock(sk);
+				/* sk might have disappeared from the hash before
+				 * we got the lock */
+				if (sk->sk_state != TCP_CLOSE)
+					tcp_kill_ve_onesk(sk);
+				bh_unlock_sock(sk);
+				sock_put(sk);
+				goto more_work;
+			}
+		}
+		write_unlock(&head[i].lock);
+	}
+	local_bh_enable();
+}
+EXPORT_SYMBOL(tcp_v4_kill_ve_sockets);
+#endif
+
 EXPORT_SYMBOL(ipv4_specific);
 EXPORT_SYMBOL(tcp_hashinfo);
 EXPORT_SYMBOL(tcp_prot);
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/tcp_minisocks.c linux-2.6.16.46-0.12-027test011/net/ipv4/tcp_minisocks.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/tcp_minisocks.c	2007-08-24 19:28:09.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/tcp_minisocks.c	2007-08-28 17:35:32.000000000 +0400
@@ -29,6 +29,9 @@
 #include <net/inet_common.h>
 #include <net/xfrm.h>
 
+#include <ub/ub_net.h>
+#include <ub/ub_orphan.h>
+
 #ifdef CONFIG_SYSCTL
 #define SYNC_INIT 0 /* let the user enable it */
 #else
@@ -37,6 +40,11 @@
 
 int sysctl_tcp_syncookies = SYNC_INIT; 
 int sysctl_tcp_abort_on_overflow;
+int sysctl_tcp_max_tw_kmem_fraction = 384;
+int sysctl_tcp_max_tw_buckets_ub = 16536;
+
+EXPORT_SYMBOL(sysctl_tcp_max_tw_kmem_fraction);
+EXPORT_SYMBOL(sysctl_tcp_max_tw_buckets_ub);
 
 struct inet_timewait_death_row tcp_death_row = {
 	.sysctl_max_tw_buckets = NR_FILE * 2,
@@ -53,6 +61,7 @@ struct inet_timewait_death_row tcp_death
 	.twcal_hand	= -1,
 	.twcal_timer	= TIMER_INITIALIZER(inet_twdr_twcal_tick, 0,
 					    (unsigned long)&tcp_death_row),
+	.ub_managed	= 1,
 };
 
 EXPORT_SYMBOL_GPL(tcp_death_row);
@@ -281,7 +290,8 @@ void tcp_time_wait(struct sock *sk, int 
 	if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
 		recycle_ok = icsk->icsk_af_ops->remember_stamp(sk);
 
-	if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets)
+	if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets &&
+			ub_timewait_check(sk, &tcp_death_row))
 		tw = inet_twsk_alloc(sk, state);
 
 	if (tw != NULL) {
@@ -294,6 +304,8 @@ void tcp_time_wait(struct sock *sk, int 
 		tcptw->tw_rcv_wnd	= tcp_receive_window(tp);
 		tcptw->tw_ts_recent	= tp->rx_opt.ts_recent;
 		tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
+		if (sk->sk_user_data != NULL)
+			tw->tw_rcv_wscale |= TW_WSCALE_SPEC;
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 		if (tw->tw_family == PF_INET6) {
@@ -307,6 +319,8 @@ void tcp_time_wait(struct sock *sk, int 
 			tw->tw_ipv6only = np->ipv6only;
 		}
 #endif
+		tw->tw_owner_env = VEID(sk->owner_env);
+
 		/* Linkage updates. */
 		__inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
 
@@ -355,6 +369,8 @@ struct sock *tcp_create_openreq_child(st
 		struct tcp_sock *newtp;
 
 		/* Now setup tcp_sock */
+		newsk->owner_env = sk->owner_env;
+
 		newtp = tcp_sk(newsk);
 		newtp->pred_flags = 0;
 		newtp->rcv_nxt = treq->rcv_isn + 1;
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/tcp_output.c linux-2.6.16.46-0.12-027test011/net/ipv4/tcp_output.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/tcp_output.c	2007-08-24 19:28:35.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/tcp_output.c	2007-08-28 17:35:30.000000000 +0400
@@ -42,6 +42,9 @@
 #include <linux/module.h>
 #include <linux/smp_lock.h>
 
+#include <ub/ub_net.h>
+#include <ub/ub_tcp.h>
+
 /* People can turn this off for buggy TCP's found in printers etc. */
 int sysctl_tcp_retrans_collapse = 1;
 
@@ -320,6 +323,13 @@ static void tcp_syn_build_options(__u32 
 		*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale));
 }
 
+static int skb_header_size(struct sock *sk, int tcp_hlen)
+{
+	struct ip_options *opt = inet_sk(sk)->opt;
+	return tcp_hlen + sizeof(struct iphdr) +
+		(opt ? opt->optlen : 0)	+ ETH_HLEN /* For hard header */;
+}
+
 /* This routine actually transmits TCP packets queued in by
  * tcp_do_sendmsg().  This is used by both the initial
  * transmission and possible later retransmissions.
@@ -338,6 +348,7 @@ static int tcp_transmit_skb(struct sock 
 	struct tcp_sock *tp;
 	struct tcp_skb_cb *tcb;
 	int tcp_header_size;
+	int header_size;
 	struct tcphdr *th;
 	int sysctl_flags;
 	int err;
@@ -392,7 +403,21 @@ static int tcp_transmit_skb(struct sock 
 				    (tp->rx_opt.eff_sacks *
 				     TCPOLEN_SACK_PERBLOCK));
 	}
-		
+
+	/* Unfortunately, we can have skb from outside world here
+	 * with size insufficient for header. It is impossible to make
+	 * guess when we queue skb, so the decision should be made
+	 * here. Den
+	 */
+	header_size = skb_header_size(sk, tcp_header_size);
+	if (skb->data - header_size < skb->head) {
+		int delta = header_size - skb_headroom(skb);
+		err = pskb_expand_head(skb, SKB_DATA_ALIGN(delta),
+				0, GFP_ATOMIC);
+		if (err)
+			return err;
+	}
+
 	if (tcp_packets_in_flight(tp) == 0)
 		tcp_ca_event(sk, CA_EVENT_TX_START);
 
@@ -530,15 +555,23 @@ int tcp_fragment(struct sock *sk, struct
 	if (nsize < 0)
 		nsize = 0;
 
-	if (skb_cloned(skb) &&
-	    skb_is_nonlinear(skb) &&
-	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
-		return -ENOMEM;
+	if (skb_cloned(skb) && skb_is_nonlinear(skb)) {
+		unsigned long chargesize;
+		chargesize = skb_bc(skb)->charged;
+		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+			return -ENOMEM;
+		ub_sock_tcp_unchargesend(sk, chargesize);
+		ub_tcpsndbuf_charge_forced(sk, skb);
+	}
 
 	/* Get a new skb... force flag on. */
 	buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
 	if (buff == NULL)
 		return -ENOMEM; /* We'll just try again later. */
+	if (ub_tcpsndbuf_charge(sk, buff) < 0) {
+		kfree_skb(buff);
+		return -ENOMEM;
+	}
 
 	buff->truesize = skb->len - len;
 	skb->truesize -= buff->truesize;
@@ -983,6 +1016,11 @@ static int tso_fragment(struct sock *sk,
 	if (unlikely(buff == NULL))
 		return -ENOMEM;
 
+	if (ub_tcpsndbuf_charge(sk, buff) < 0) {
+		kfree_skb(buff);
+		return -ENOMEM;
+	}
+
 	buff->truesize = nlen;
 	skb->truesize -= nlen;
 
@@ -1286,7 +1324,7 @@ u32 __tcp_select_window(struct sock *sk)
 	if (free_space < full_space/2) {
 		icsk->icsk_ack.quick = 0;
 
-		if (tcp_memory_pressure)
+		if (ub_tcp_shrink_rcvbuf(sk))
 			tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss);
 
 		if (free_space < mss)
@@ -1714,6 +1752,7 @@ void tcp_send_fin(struct sock *sk)
 				break;
 			yield();
 		}
+		ub_tcpsndbuf_charge_forced(sk, skb);
 
 		/* Reserve space for headers and prepare control bits. */
 		skb_reserve(skb, MAX_TCP_HEADER);
@@ -1785,6 +1824,10 @@ int tcp_send_synack(struct sock *sk)
 			struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
 			if (nskb == NULL)
 				return -ENOMEM;
+			if (ub_tcpsndbuf_charge(sk, skb) < 0) {
+				kfree_skb(nskb);
+				return -ENOMEM;
+			}
 			__skb_unlink(skb, &sk->sk_write_queue);
 			skb_header_release(nskb);
 			__skb_queue_head(&sk->sk_write_queue, nskb);
@@ -1881,6 +1924,7 @@ static void tcp_connect_init(struct sock
 	struct dst_entry *dst = __sk_dst_get(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	__u8 rcv_wscale;
+	static int once = 0;
 
 	/* We'll fix this up when we get a response from the other end.
 	 * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
@@ -1894,9 +1938,23 @@ static void tcp_connect_init(struct sock
 	tp->max_window = 0;
 	tcp_sync_mss(sk, dst_mtu(dst));
 
+	if (!once && dst_metric(dst, RTAX_ADVMSS) == 0) {
+		once = 1;
+
+		printk("Oops in connect_init! dst->advmss=%d\n",
+						dst_metric(dst, RTAX_ADVMSS));
+		printk("dst: pmtu=%u\n", dst_metric(dst, RTAX_MTU));
+		printk("sk->state=%d, tp: ack.rcv_mss=%d, mss_cache=%d, "
+				"advmss=%d, user_mss=%d\n",
+				sk->sk_state, inet_csk(sk)->icsk_ack.rcv_mss,
+				tp->mss_cache, tp->advmss, tp->rx_opt.user_mss);
+	}
+
 	if (!tp->window_clamp)
 		tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
 	tp->advmss = dst_metric(dst, RTAX_ADVMSS);
+	if (tp->advmss == 0)
+		tp->advmss = 1460;
 	tcp_initialize_rcv_mss(sk);
 
 	tcp_select_initial_window(tcp_full_space(sk),
@@ -1937,6 +1995,10 @@ int tcp_connect(struct sock *sk)
 	buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
 	if (unlikely(buff == NULL))
 		return -ENOBUFS;
+	if (ub_tcpsndbuf_charge(sk, buff) < 0) {
+		kfree_skb(buff);
+		return -ENOBUFS;
+	}
 
 	/* Reserve space for headers. */
 	skb_reserve(buff, MAX_TCP_HEADER);
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/tcp_timer.c linux-2.6.16.46-0.12-027test011/net/ipv4/tcp_timer.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/tcp_timer.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/tcp_timer.c	2007-08-28 17:35:32.000000000 +0400
@@ -22,6 +22,8 @@
 
 #include <linux/module.h>
 #include <net/tcp.h>
+#include <ub/ub_orphan.h>
+#include <ub/ub_tcp.h>
 
 int sysctl_tcp_syn_retries = TCP_SYN_RETRIES; 
 int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES; 
@@ -67,7 +69,7 @@ static void tcp_write_err(struct sock *s
 static int tcp_out_of_resources(struct sock *sk, int do_reset)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	int orphans = atomic_read(&tcp_orphan_count);
+	int orphans = ub_get_orphan_count(sk);
 
 	/* If peer does not open window for long time, or did not transmit 
 	 * anything for long time, penalize it. */
@@ -78,9 +80,7 @@ static int tcp_out_of_resources(struct s
 	if (sk->sk_err_soft)
 		orphans <<= 1;
 
-	if (orphans >= sysctl_tcp_max_orphans ||
-	    (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
-	     atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
+	if (ub_too_many_orphans(sk, orphans)) {
 		if (net_ratelimit())
 			printk(KERN_INFO "Out of socket memory\n");
 
@@ -173,9 +173,12 @@ static int tcp_write_timeout(struct sock
 static void tcp_delack_timer(unsigned long data)
 {
 	struct sock *sk = (struct sock*)data;
+	struct ve_struct *env;
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
+	env = set_exec_env(sk->owner_env);
+
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk)) {
 		/* Try again later. */
@@ -224,11 +227,12 @@ static void tcp_delack_timer(unsigned lo
 	TCP_CHECK_TIMER(sk);
 
 out:
-	if (tcp_memory_pressure)
+	if (ub_tcp_memory_pressure(sk))
 		sk_stream_mem_reclaim(sk);
 out_unlock:
 	bh_unlock_sock(sk);
 	sock_put(sk);
+	(void)set_exec_env(env);
 }
 
 static void tcp_probe_timer(struct sock *sk)
@@ -283,8 +287,11 @@ static void tcp_probe_timer(struct sock 
 static void tcp_retransmit_timer(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	struct ve_struct *env;
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
+	env = set_exec_env(sk->owner_env);
+
 	if (!tp->packets_out)
 		goto out;
 
@@ -381,15 +388,19 @@ out_reset_timer:
 	if (icsk->icsk_retransmits > sysctl_tcp_retries1)
 		__sk_dst_reset(sk);
 
-out:;
+out:
+	(void)set_exec_env(env);
 }
 
 static void tcp_write_timer(unsigned long data)
 {
 	struct sock *sk = (struct sock*)data;
+	struct ve_struct *env;
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	int event;
 
+	env = set_exec_env(sk->owner_env);
+
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk)) {
 		/* Try again later */
@@ -423,6 +434,7 @@ out:
 out_unlock:
 	bh_unlock_sock(sk);
 	sock_put(sk);
+	(void)set_exec_env(env);
 }
 
 /*
@@ -450,10 +462,13 @@ void tcp_set_keepalive(struct sock *sk, 
 static void tcp_keepalive_timer (unsigned long data)
 {
 	struct sock *sk = (struct sock *) data;
+	struct ve_struct *env;
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	__u32 elapsed;
 
+	env = set_exec_env(sk->owner_env);
+
 	/* Only process if socket is not in use. */
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk)) {
@@ -525,4 +540,5 @@ death:	
 out:
 	bh_unlock_sock(sk);
 	sock_put(sk);
+	(void)set_exec_env(env);
 }
diff -upr linux-2.6.16.46-0.12.orig/net/ipv4/udp.c linux-2.6.16.46-0.12-027test011/net/ipv4/udp.c
--- linux-2.6.16.46-0.12.orig/net/ipv4/udp.c	2007-08-24 19:28:09.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv4/udp.c	2007-08-28 17:35:32.000000000 +0400
@@ -127,7 +127,9 @@ static int udp_v4_get_port(struct sock *
 	struct hlist_node *node;
 	struct sock *sk2;
 	struct inet_sock *inet = inet_sk(sk);
+	struct ve_struct *env;
 
+	env = sk->owner_env;
 	write_lock_bh(&udp_hash_lock);
 	if (snum == 0) {
 		int best_size_so_far, best, result, i;
@@ -141,7 +143,7 @@ static int udp_v4_get_port(struct sock *
 			struct hlist_head *list;
 			int size;
 
-			list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
+			list = &udp_hash[udp_hashfn(result, VEID(env))];
 			if (hlist_empty(list)) {
 				if (result > sysctl_local_port_range[1])
 					result = sysctl_local_port_range[0] +
@@ -163,7 +165,7 @@ static int udp_v4_get_port(struct sock *
 				result = sysctl_local_port_range[0]
 					+ ((result - sysctl_local_port_range[0]) &
 					   (UDP_HTABLE_SIZE - 1));
-			if (!udp_lport_inuse(result))
+			if (!udp_lport_inuse(result, env))
 				break;
 		}
 		if (i >= (1 << 16) / UDP_HTABLE_SIZE)
@@ -172,11 +174,12 @@ gotit:
 		udp_port_rover = snum = result;
 	} else {
 		sk_for_each(sk2, node,
-			    &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
+			    &udp_hash[udp_hashfn(snum, VEID(env))]) {
 			struct inet_sock *inet2 = inet_sk(sk2);
 
 			if (inet2->num == snum &&
 			    sk2 != sk &&
+			    ve_accessible_strict(sk2->owner_env, env) &&
 			    !ipv6_only_sock(sk2) &&
 			    (!sk2->sk_bound_dev_if ||
 			     !sk->sk_bound_dev_if ||
@@ -190,7 +193,7 @@ gotit:
 	}
 	inet->num = snum;
 	if (sk_unhashed(sk)) {
-		struct hlist_head *h = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
+		struct hlist_head *h = &udp_hash[udp_hashfn(snum, VEID(env))];
 
 		sk_add_node(sk, h);
 		sock_prot_inc_use(sk->sk_prot);
@@ -228,11 +231,15 @@ static struct sock *udp_v4_lookup_longwa
 	struct hlist_node *node;
 	unsigned short hnum = ntohs(dport);
 	int badness = -1;
+	struct ve_struct *env;
 
-	sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) {
+	env = get_exec_env();
+	sk_for_each(sk, node, &udp_hash[udp_hashfn(hnum, VEID(env))]) {
 		struct inet_sock *inet = inet_sk(sk);
 
-		if (inet->num == hnum && !ipv6_only_sock(sk)) {
+		if (inet->num == hnum &&
+		    ve_accessible_strict(sk->owner_env, env) &&
+		    !ipv6_only_sock(sk)) {
 			int score = (sk->sk_family == PF_INET ? 1 : 0);
 			if (inet->rcv_saddr) {
 				if (inet->rcv_saddr != daddr)
@@ -1058,7 +1065,8 @@ static int udp_v4_mcast_deliver(struct s
 	int dif;
 
 	read_lock(&udp_hash_lock);
-	sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
+	sk = sk_head(&udp_hash[udp_hashfn(ntohs(uh->dest),
+				VEID(skb->owner_env))]);
 	dif = skb->dev->ifindex;
 	sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
 	if (sk) {
@@ -1377,10 +1385,14 @@ static struct sock *udp_get_first(struct
 {
 	struct sock *sk;
 	struct udp_iter_state *state = seq->private;
+	struct ve_struct *env;
 
+	env = get_exec_env();
 	for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
 		struct hlist_node *node;
 		sk_for_each(sk, node, &udp_hash[state->bucket]) {
+			if (!ve_accessible(sk->owner_env, env))
+				continue;
 			if (sk->sk_family == state->family)
 				goto found;
 		}
@@ -1397,8 +1409,13 @@ static struct sock *udp_get_next(struct 
 	do {
 		sk = sk_next(sk);
 try_again:
-		;
-	} while (sk && sk->sk_family != state->family);
+		if (!sk)
+			break;
+		if (sk->sk_family != state->family)
+			continue;
+		if (ve_accessible(sk->owner_env, get_exec_env()))
+			break;
+	} while (1);
 
 	if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
 		sk = sk_head(&udp_hash[state->bucket]);
@@ -1484,7 +1501,7 @@ int udp_proc_register(struct udp_seq_afi
 	afinfo->seq_fops->llseek	= seq_lseek;
 	afinfo->seq_fops->release	= seq_release_private;
 
-	p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
+	p = proc_glob_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
 	if (p)
 		p->data = afinfo;
 	else
@@ -1496,7 +1513,8 @@ void udp_proc_unregister(struct udp_seq_
 {
 	if (!afinfo)
 		return;
-	proc_net_remove(afinfo->name);
+
+	remove_proc_glob_entry(afinfo->name, NULL);
 	memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
 }
 
@@ -1539,7 +1557,7 @@ static int udp4_seq_show(struct seq_file
 static struct file_operations udp4_seq_fops;
 static struct udp_seq_afinfo udp4_seq_afinfo = {
 	.owner		= THIS_MODULE,
-	.name		= "udp",
+	.name		= "net/udp",
 	.family		= AF_INET,
 	.seq_show	= udp4_seq_show,
 	.seq_fops	= &udp4_seq_fops,
diff -upr linux-2.6.16.46-0.12.orig/net/ipv6/addrconf.c linux-2.6.16.46-0.12-027test011/net/ipv6/addrconf.c
--- linux-2.6.16.46-0.12.orig/net/ipv6/addrconf.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv6/addrconf.c	2007-08-28 17:35:34.000000000 +0400
@@ -100,6 +100,7 @@
 #define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b)))
 
 #ifdef CONFIG_SYSCTL
+static struct addrconf_sysctl_table * __addrconf_sysctl_register(struct inet6_dev *idev, char *devname, int ifindex, struct ipv6_devconf *p);
 static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p);
 static void addrconf_sysctl_unregister(struct ipv6_devconf *p);
 #endif
@@ -133,8 +134,6 @@ static DEFINE_SPINLOCK(addrconf_verify_l
 static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
 static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
 
-static int addrconf_ifdown(struct net_device *dev, int how);
-
 static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
 static void addrconf_dad_timer(unsigned long data);
 static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
@@ -149,7 +148,7 @@ static int ipv6_chk_same_addr(const stru
 
 static struct notifier_block *inet6addr_chain;
 
-struct ipv6_devconf ipv6_devconf = {
+struct ipv6_devconf global_ipv6_devconf = {
 	.forwarding		= 0,
 	.hop_limit		= IPV6_DEFAULT_HOPLIMIT,
 	.mtu6			= IPV6_MIN_MTU,
@@ -171,7 +170,7 @@ struct ipv6_devconf ipv6_devconf = {
 	.max_addresses		= IPV6_MAX_ADDRESSES,
 };
 
-static struct ipv6_devconf ipv6_devconf_dflt = {
+struct ipv6_devconf global_ipv6_devconf_dflt = {
 	.forwarding		= 0,
 	.hop_limit		= IPV6_DEFAULT_HOPLIMIT,
 	.mtu6			= IPV6_MIN_MTU,
@@ -192,6 +191,12 @@ static struct ipv6_devconf ipv6_devconf_
 	.max_addresses		= IPV6_MAX_ADDRESSES,
 };
 
+#ifdef CONFIG_VE
+#define ipv6_devconf_dflt	(*(get_exec_env()->_ipv6_devconf_dflt))
+#else
+#define ipv6_devconf_dflt	global_ipv6_devconf_dflt
+#endif
+
 /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
 #if 0
 const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
@@ -381,7 +386,7 @@ static struct inet6_dev * ipv6_add_dev(s
 		    dev->type == ARPHRD_TUNNEL ||
 		    dev->type == ARPHRD_NONE ||
 		    dev->type == ARPHRD_SIT) {
-			printk(KERN_INFO
+			ve_printk(VE_LOG, KERN_INFO
 			       "%s: Disabled Privacy Extensions\n",
 			       dev->name);
 			ndev->cnf.use_tempaddr = -1;
@@ -469,8 +474,8 @@ static void addrconf_forward_change(void
 		read_lock(&addrconf_lock);
 		idev = __in6_dev_get(dev);
 		if (idev) {
-			int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding);
-			idev->cnf.forwarding = ipv6_devconf.forwarding;
+			int changed = (!idev->cnf.forwarding) ^ (!ve_ipv6_devconf.forwarding);
+			idev->cnf.forwarding = ve_ipv6_devconf.forwarding;
 			if (changed)
 				dev_forward_change(idev);
 		}
@@ -1163,9 +1168,10 @@ int ipv6_chk_addr(struct in6_addr *addr,
 	read_lock_bh(&addrconf_hash_lock);
 	for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
-		    !(ifp->flags&IFA_F_TENTATIVE)) {
+		    !(ifp->flags&IFA_F_TENTATIVE) &&
+		    ve_accessible_strict(ifp->idev->dev->owner_env, get_exec_env())) {
 			if (dev == NULL || ifp->idev->dev == dev ||
-			    !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))
+			    !((ifp->scope&(IFA_LINK|IFA_HOST)) || strict))
 				break;
 		}
 	}
@@ -1181,7 +1187,9 @@ int ipv6_chk_same_addr(const struct in6_
 
 	for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
 		if (ipv6_addr_equal(&ifp->addr, addr)) {
-			if (dev == NULL || ifp->idev->dev == dev)
+			if ((dev == NULL &&
+			     ve_accessible_strict(ifp->idev->dev->owner_env, get_exec_env()))
+			    || ifp->idev->dev == dev)
 				break;
 		}
 	}
@@ -1195,9 +1203,10 @@ struct inet6_ifaddr * ipv6_get_ifaddr(st
 
 	read_lock_bh(&addrconf_hash_lock);
 	for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
-		if (ipv6_addr_equal(&ifp->addr, addr)) {
+		if (ipv6_addr_equal(&ifp->addr, addr) &&
+		    ve_accessible_strict(ifp->idev->dev->owner_env, get_exec_env())) {
 			if (dev == NULL || ifp->idev->dev == dev ||
-			    !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
+			    !((ifp->scope&(IFA_LINK|IFA_HOST)) || strict)) {
 				in6_ifa_hold(ifp);
 				break;
 			}
@@ -1399,12 +1408,8 @@ static int __ipv6_regen_rndid(struct ine
 
 	dev = idev->dev;
 
-	if (ipv6_generate_eui64(idev->work_eui64, dev)) {
-		printk(KERN_INFO
-			"__ipv6_regen_rndid(idev=%p): cannot get EUI64 identifier; use random bytes.\n",
-			idev);
+	if (ipv6_generate_eui64(idev->work_eui64, dev))
 		get_random_bytes(idev->work_eui64, sizeof(idev->work_eui64));
-	}
 regen:
 	spin_lock(&md5_tfm_lock);
 	if (unlikely(md5_tfm == NULL)) {
@@ -1857,7 +1862,7 @@ err_exit:
 /*
  *	Manual configuration of address on an interface
  */
-static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen)
+int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen)
 {
 	struct inet6_ifaddr *ifp;
 	struct inet6_dev *idev;
@@ -1886,6 +1891,7 @@ static int inet6_addr_add(int ifindex, s
 
 	return PTR_ERR(ifp);
 }
+EXPORT_SYMBOL_GPL(inet6_addr_add);
 
 static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen)
 {
@@ -1926,7 +1932,7 @@ int addrconf_add_ifaddr(void __user *arg
 	struct in6_ifreq ireq;
 	int err;
 	
-	if (!capable(CAP_NET_ADMIN))
+	if (!capable(CAP_VE_NET_ADMIN))
 		return -EPERM;
 	
 	if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
@@ -1943,7 +1949,7 @@ int addrconf_del_ifaddr(void __user *arg
 	struct in6_ifreq ireq;
 	int err;
 	
-	if (!capable(CAP_NET_ADMIN))
+	if (!capable(CAP_VE_NET_ADMIN))
 		return -EPERM;
 
 	if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
@@ -2285,7 +2291,7 @@ static struct notifier_block ipv6_dev_no
 	.priority = 0
 };
 
-static int addrconf_ifdown(struct net_device *dev, int how)
+int addrconf_ifdown(struct net_device *dev, int how)
 {
 	struct inet6_dev *idev;
 	struct inet6_ifaddr *ifa, **bifa;
@@ -2293,7 +2299,7 @@ static int addrconf_ifdown(struct net_de
 
 	ASSERT_RTNL();
 
-	if (dev == &loopback_dev && how == 1)
+	if (dev == get_ve0()->_loopback_dev && how == 1)
 		how = 0;
 
 	rt6_ifdown(dev);
@@ -2401,10 +2407,12 @@ static int addrconf_ifdown(struct net_de
 	}
 	return 0;
 }
+EXPORT_SYMBOL_GPL(addrconf_ifdown);
 
 static void addrconf_rs_timer(unsigned long data)
 {
 	struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
+	struct ve_struct *old_env = set_exec_env(ifp->idev->dev->owner_env);
 
 	if (ifp->idev->cnf.forwarding)
 		goto out;
@@ -2443,6 +2451,7 @@ static void addrconf_rs_timer(unsigned l
 
 out:
 	in6_ifa_put(ifp);
+	set_exec_env(old_env);
 }
 
 /*
@@ -2511,6 +2520,7 @@ static void addrconf_dad_timer(unsigned 
 	struct inet6_dev *idev = ifp->idev;
 	struct in6_addr unspec;
 	struct in6_addr mcaddr;
+	struct ve_struct *old_env = set_exec_env(ifp->idev->dev->owner_env);
 
 	read_lock_bh(&idev->lock);
 	if (idev->dead) {
@@ -2543,6 +2553,7 @@ static void addrconf_dad_timer(unsigned 
 	ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &unspec);
 out:
 	in6_ifa_put(ifp);
+	set_exec_env(old_env);
 }
 
 static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
@@ -2611,8 +2622,11 @@ static struct inet6_ifaddr *if6_get_firs
 
 	for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
 		ifa = inet6_addr_lst[state->bucket];
-		if (ifa)
-			break;
+		while (ifa) {
+			if (ve_accessible_strict(ifa->idev->dev->owner_env, get_exec_env()))
+				return ifa;
+			ifa = ifa->lst_next;
+		}
 	}
 	return ifa;
 }
@@ -2623,6 +2637,11 @@ static struct inet6_ifaddr *if6_get_next
 
 	ifa = ifa->lst_next;
 try_again:
+	while (ifa) {
+		if (ve_accessible_strict(ifa->idev->dev->owner_env, get_exec_env()))
+			break;
+		ifa = ifa->lst_next;
+	}
 	if (!ifa && ++state->bucket < IN6_ADDR_HSIZE) {
 		ifa = inet6_addr_lst[state->bucket];
 		goto try_again;
@@ -2714,14 +2733,14 @@ static struct file_operations if6_fops =
 
 int __init if6_proc_init(void)
 {
-	if (!proc_net_fops_create("if_inet6", S_IRUGO, &if6_fops))
+	if (!proc_glob_fops_create("net/if_inet6", S_IRUGO, &if6_fops))
 		return -ENOMEM;
 	return 0;
 }
 
 void if6_proc_exit(void)
 {
-	proc_net_remove("if_inet6");
+	remove_proc_glob_entry("net/if_inet6", NULL);
 }
 #endif	/* CONFIG_PROC_FS */
 
@@ -2734,6 +2753,7 @@ static void addrconf_verify(unsigned lon
 	struct inet6_ifaddr *ifp;
 	unsigned long now, next;
 	int i;
+	struct ve_struct *old_env;
 
 	spin_lock_bh(&addrconf_verify_lock);
 	now = jiffies;
@@ -2754,6 +2774,8 @@ restart:
 			if (ifp->flags & IFA_F_PERMANENT)
 				continue;
 
+			old_env = set_exec_env(ifp->idev->dev->owner_env);
+
 			spin_lock(&ifp->lock);
 			age = (now - ifp->tstamp) / HZ;
 
@@ -2768,6 +2790,7 @@ restart:
 				in6_ifa_hold(ifp);
 				read_unlock(&addrconf_hash_lock);
 				ipv6_del_addr(ifp);
+				set_exec_env(old_env);
 				goto restart;
 			} else if (age >= ifp->prefered_lft) {
 				/* jiffies - ifp->tsamp > age >= ifp->prefered_lft */
@@ -2789,6 +2812,7 @@ restart:
 
 					ipv6_ifa_notify(0, ifp);
 					in6_ifa_put(ifp);
+					set_exec_env(old_env);
 					goto restart;
 				}
 #ifdef CONFIG_IPV6_PRIVACY
@@ -2810,6 +2834,7 @@ restart:
 						ipv6_create_tempaddr(ifpub, ifp);
 						in6_ifa_put(ifpub);
 						in6_ifa_put(ifp);
+						set_exec_env(old_env);
 						goto restart;
 					}
 				} else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
@@ -2822,6 +2847,7 @@ restart:
 					next = ifp->tstamp + ifp->prefered_lft * HZ;
 				spin_unlock(&ifp->lock);
 			}
+			set_exec_env(old_env);
 		}
 		read_unlock(&addrconf_hash_lock);
 	}
@@ -3397,7 +3423,7 @@ int addrconf_sysctl_forward(ctl_table *c
 	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
 
 	if (write && valp != &ipv6_devconf_dflt.forwarding) {
-		if (valp != &ipv6_devconf.forwarding) {
+		if (valp != &ve_ipv6_devconf.forwarding) {
 			if ((!*valp) ^ (!val)) {
 				struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1;
 				if (idev == NULL)
@@ -3405,7 +3431,7 @@ int addrconf_sysctl_forward(ctl_table *c
 				dev_forward_change(idev);
 			}
 		} else {
-			ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding;
+			ipv6_devconf_dflt.forwarding = ve_ipv6_devconf.forwarding;
 			addrconf_forward_change();
 		}
 		if (*valp)
@@ -3448,7 +3474,7 @@ static int addrconf_sysctl_forward_strat
 	}
 
 	if (valp != &ipv6_devconf_dflt.forwarding) {
-		if (valp != &ipv6_devconf.forwarding) {
+		if (valp != &ve_ipv6_devconf.forwarding) {
 			struct inet6_dev *idev = (struct inet6_dev *)table->extra1;
 			int changed;
 			if (unlikely(idev == NULL))
@@ -3484,7 +3510,7 @@ static struct addrconf_sysctl_table
         	{
 			.ctl_name	=	NET_IPV6_FORWARDING,
 			.procname	=	"forwarding",
-         		.data		=	&ipv6_devconf.forwarding,
+         		.data		=	&global_ipv6_devconf.forwarding,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&addrconf_sysctl_forward,
@@ -3493,7 +3519,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_HOP_LIMIT,
 			.procname	=	"hop_limit",
-         		.data		=	&ipv6_devconf.hop_limit,
+         		.data		=	&global_ipv6_devconf.hop_limit,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
 			.proc_handler	=	proc_dointvec,
@@ -3501,7 +3527,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_MTU,
 			.procname	=	"mtu",
-			.data		=	&ipv6_devconf.mtu6,
+			.data		=	&global_ipv6_devconf.mtu6,
          		.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec,
@@ -3509,7 +3535,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_ACCEPT_RA,
 			.procname	=	"accept_ra",
-         		.data		=	&ipv6_devconf.accept_ra,
+         		.data		=	&global_ipv6_devconf.accept_ra,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec,
@@ -3517,7 +3543,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_ACCEPT_REDIRECTS,
 			.procname	=	"accept_redirects",
-         		.data		=	&ipv6_devconf.accept_redirects,
+         		.data		=	&global_ipv6_devconf.accept_redirects,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec,
@@ -3525,7 +3551,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_AUTOCONF,
 			.procname	=	"autoconf",
-         		.data		=	&ipv6_devconf.autoconf,
+         		.data		=	&global_ipv6_devconf.autoconf,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec,
@@ -3533,7 +3559,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_DAD_TRANSMITS,
 			.procname	=	"dad_transmits",
-         		.data		=	&ipv6_devconf.dad_transmits,
+         		.data		=	&global_ipv6_devconf.dad_transmits,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec,
@@ -3541,7 +3567,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_RTR_SOLICITS,
 			.procname	=	"router_solicitations",
-         		.data		=	&ipv6_devconf.rtr_solicits,
+         		.data		=	&global_ipv6_devconf.rtr_solicits,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec,
@@ -3549,7 +3575,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_RTR_SOLICIT_INTERVAL,
 			.procname	=	"router_solicitation_interval",
-         		.data		=	&ipv6_devconf.rtr_solicit_interval,
+         		.data		=	&global_ipv6_devconf.rtr_solicit_interval,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec_jiffies,
@@ -3558,7 +3584,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_RTR_SOLICIT_DELAY,
 			.procname	=	"router_solicitation_delay",
-         		.data		=	&ipv6_devconf.rtr_solicit_delay,
+         		.data		=	&global_ipv6_devconf.rtr_solicit_delay,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec_jiffies,
@@ -3567,7 +3593,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_FORCE_MLD_VERSION,
 			.procname	=	"force_mld_version",
-         		.data		=	&ipv6_devconf.force_mld_version,
+         		.data		=	&global_ipv6_devconf.force_mld_version,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
          		.proc_handler	=	&proc_dointvec,
@@ -3576,7 +3602,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_USE_TEMPADDR,
 			.procname	=	"use_tempaddr",
-	 		.data		=	&ipv6_devconf.use_tempaddr,
+	 		.data		=	&global_ipv6_devconf.use_tempaddr,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
 	 		.proc_handler	=	&proc_dointvec,
@@ -3584,7 +3610,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_TEMP_VALID_LFT,
 			.procname	=	"temp_valid_lft",
-	 		.data		=	&ipv6_devconf.temp_valid_lft,
+	 		.data		=	&global_ipv6_devconf.temp_valid_lft,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
 	 		.proc_handler	=	&proc_dointvec,
@@ -3592,7 +3618,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_TEMP_PREFERED_LFT,
 			.procname	=	"temp_prefered_lft",
-	 		.data		=	&ipv6_devconf.temp_prefered_lft,
+	 		.data		=	&global_ipv6_devconf.temp_prefered_lft,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
 	 		.proc_handler	=	&proc_dointvec,
@@ -3600,7 +3626,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_REGEN_MAX_RETRY,
 			.procname	=	"regen_max_retry",
-	 		.data		=	&ipv6_devconf.regen_max_retry,
+	 		.data		=	&global_ipv6_devconf.regen_max_retry,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
 	 		.proc_handler	=	&proc_dointvec,
@@ -3608,7 +3634,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_MAX_DESYNC_FACTOR,
 			.procname	=	"max_desync_factor",
-	 		.data		=	&ipv6_devconf.max_desync_factor,
+	 		.data		=	&global_ipv6_devconf.max_desync_factor,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
 	 		.proc_handler	=	&proc_dointvec,
@@ -3617,7 +3643,7 @@ static struct addrconf_sysctl_table
 		{
 			.ctl_name	=	NET_IPV6_MAX_ADDRESSES,
 			.procname	=	"max_addresses",
-			.data		=	&ipv6_devconf.max_addresses,
+			.data		=	&global_ipv6_devconf.max_addresses,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
 			.proc_handler	=	&proc_dointvec,
@@ -3672,29 +3698,22 @@ static struct addrconf_sysctl_table
 	},
 };
 
-static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p)
+static struct addrconf_sysctl_table *
+__addrconf_sysctl_register(struct inet6_dev *idev, char *dev_name, int ifindex, struct ipv6_devconf *p)
 {
 	int i;
-	struct net_device *dev = idev ? idev->dev : NULL;
 	struct addrconf_sysctl_table *t;
-	char *dev_name = NULL;
 
 	t = kmalloc(sizeof(*t), GFP_KERNEL);
 	if (t == NULL)
-		return;
+		return NULL;
+
 	memcpy(t, &addrconf_sysctl, sizeof(*t));
 	for (i=0; t->addrconf_vars[i].data; i++) {
-		t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
+		t->addrconf_vars[i].data += (char*)p - (char*)&global_ipv6_devconf;
 		t->addrconf_vars[i].de = NULL;
 		t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
 	}
-	if (dev) {
-		dev_name = dev->name; 
-		t->addrconf_dev[0].ctl_name = dev->ifindex;
-	} else {
-		dev_name = "default";
-		t->addrconf_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
-	}
 
 	/* 
 	 * Make a copy of dev_name, because '.procname' is regarded as const 
@@ -3705,6 +3724,7 @@ static void addrconf_sysctl_register(str
 	if (!dev_name)
 	    goto free;
 
+	t->addrconf_dev[0].ctl_name = ifindex;
 	t->addrconf_dev[0].procname = dev_name;
 
 	t->addrconf_dev[0].child = t->addrconf_vars;
@@ -3719,9 +3739,7 @@ static void addrconf_sysctl_register(str
 	t->sysctl_header = register_sysctl_table(t->addrconf_root_dir, 0);
 	if (t->sysctl_header == NULL)
 		goto free_procname;
-	else
-		p->sysctl = t;
-	return;
+	return t;
 
 	/* error path */
  free_procname:
@@ -3729,7 +3747,26 @@ static void addrconf_sysctl_register(str
  free:
 	kfree(t);
 
-	return;
+	return NULL;
+}
+
+static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p)
+{
+	struct net_device *dev;
+	char *dev_name;
+	int ifindex;
+
+	dev = idev ? idev->dev : NULL;
+
+	if (dev) {
+		dev_name = dev->name;
+		ifindex = dev->ifindex;
+	} else {
+		dev_name = "default";
+		ifindex = NET_PROTO_CONF_DEFAULT;
+	}
+
+	p->sysctl = __addrconf_sysctl_register(idev, dev_name, ifindex, p);
 }
 
 static void addrconf_sysctl_unregister(struct ipv6_devconf *p)
@@ -3743,9 +3780,64 @@ static void addrconf_sysctl_unregister(s
 	}
 }
 
+#ifdef CONFIG_VE
+int addrconf_sysctl_init(struct ve_struct *ve)
+{
+	int err = 0;
+	struct ipv6_devconf *conf, *conf_def;
 
-#endif
+	err = -ENOMEM;
 
+	conf = kmalloc(sizeof(*conf), GFP_KERNEL);
+	if (!conf)
+		goto err1;
+
+	memcpy(conf, &global_ipv6_devconf, sizeof(*conf));
+	conf->sysctl = __addrconf_sysctl_register(NULL, "all",
+			NET_PROTO_CONF_ALL, conf);
+	if (!conf->sysctl)
+		goto err2;
+
+	conf_def = kmalloc(sizeof(*conf_def), GFP_KERNEL);
+	if (!conf_def)
+		goto err3;
+
+	memcpy(conf_def, &global_ipv6_devconf_dflt, sizeof(*conf_def));
+	conf_def->sysctl = __addrconf_sysctl_register(NULL, "default",
+			NET_PROTO_CONF_DEFAULT, conf_def);
+	if (!conf_def->sysctl)
+		goto err4;
+
+	ve->_ipv6_devconf = conf;
+	ve->_ipv6_devconf_dflt = conf_def;
+	return 0;
+
+err4:
+	kfree(conf_def);
+err3:
+	addrconf_sysctl_unregister(conf);
+err2:
+	kfree(conf);
+err1:
+	return err;
+}
+EXPORT_SYMBOL(addrconf_sysctl_init);
+
+void addrconf_sysctl_fini(struct ve_struct *ve)
+{
+	addrconf_sysctl_unregister(ve->_ipv6_devconf);
+	addrconf_sysctl_unregister(ve->_ipv6_devconf_dflt);
+}
+EXPORT_SYMBOL(addrconf_sysctl_fini);
+
+void addrconf_sysctl_free(struct ve_struct *ve)
+{
+	kfree(ve->_ipv6_devconf);
+	kfree(ve->_ipv6_devconf_dflt);
+}
+EXPORT_SYMBOL(addrconf_sysctl_free);
+#endif /* CONFIG_VE */
+#endif /* CONFIG_SYSCTL */
 /*
  *      Device notifier
  */
@@ -3768,6 +3860,11 @@ int __init addrconf_init(void)
 {
 	int err = 0;
 
+#ifdef CONFIG_VE
+	get_ve0()->_ipv6_devconf = &global_ipv6_devconf;
+	get_ve0()->_ipv6_devconf_dflt = &global_ipv6_devconf_dflt;
+#endif
+
 	/* The addrconf netdev notifier requires that loopback_dev
 	 * has it's ipv6 private information allocated and setup
 	 * before it can bring up and give link-local addresses
@@ -3809,7 +3906,7 @@ int __init addrconf_init(void)
 #ifdef CONFIG_SYSCTL
 	addrconf_sysctl.sysctl_header =
 		register_sysctl_table(addrconf_sysctl.addrconf_root_dir, 0);
-	addrconf_sysctl_register(NULL, &ipv6_devconf_dflt);
+	__addrconf_sysctl_register(NULL, "default", NET_PROTO_CONF_DEFAULT, &global_ipv6_devconf_dflt);
 #endif
 
 	return 0;
@@ -3826,8 +3923,8 @@ void __exit addrconf_cleanup(void)
 
 	rtnetlink_links[PF_INET6] = NULL;
 #ifdef CONFIG_SYSCTL
-	addrconf_sysctl_unregister(&ipv6_devconf_dflt);
-	addrconf_sysctl_unregister(&ipv6_devconf);
+	addrconf_sysctl_unregister(&global_ipv6_devconf_dflt);
+	addrconf_sysctl_unregister(&global_ipv6_devconf);
 #endif
 
 	rtnl_lock();
@@ -3872,6 +3969,6 @@ void __exit addrconf_cleanup(void)
 #endif
 
 #ifdef CONFIG_PROC_FS
-	proc_net_remove("if_inet6");
+	remove_proc_glob_entry("net/if_inet6", NULL);
 #endif
 }
diff -upr linux-2.6.16.46-0.12.orig/net/ipv6/af_inet6.c linux-2.6.16.46-0.12-027test011/net/ipv6/af_inet6.c
--- linux-2.6.16.46-0.12.orig/net/ipv6/af_inet6.c	2007-08-24 19:28:13.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv6/af_inet6.c	2007-08-28 17:35:32.000000000 +0400
@@ -60,6 +60,7 @@
 #ifdef CONFIG_IPV6_TUNNEL
 #include <net/ip6_tunnel.h>
 #endif
+#include <ub/ub_net.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -143,6 +144,10 @@ lookup_protocol:
 			goto out_rcu_unlock;
 	}
 
+	err = vz_security_protocol_check(answer->protocol);
+	if (err < 0)
+		goto out_rcu_unlock;
+
 	err = -EPERM;
 	if (answer->capability > 0 && !capable(answer->capability))
 		goto out_rcu_unlock;
@@ -160,6 +165,13 @@ lookup_protocol:
 	if (sk == NULL)
 		goto out;
 
+	err = -ENOBUFS;
+	if (ub_sock_charge(sk, PF_INET6, sock->type))
+		goto out_sk_free;
+	/* if charge was successful, sock_init_data() MUST be called to
+	 * set sk->sk_type. otherwise sk will be uncharged to wrong resource
+	 */
+
 	sock_init_data(sock, sk);
 
 	err = 0;
@@ -234,6 +246,9 @@ out:
 out_rcu_unlock:
 	rcu_read_unlock();
 	goto out;
+out_sk_free:
+	sk_free(sk);
+	return err;
 }
 
 
@@ -715,21 +730,21 @@ snmp6_mib_free(void *ptr[2])
 
 static int __init init_ipv6_mibs(void)
 {
-	if (snmp6_mib_init((void **)ipv6_statistics, sizeof (struct ipstats_mib),
+	if (snmp6_mib_init((void **)ve_ipv6_statistics, sizeof (struct ipstats_mib),
 			   __alignof__(struct ipstats_mib)) < 0)
 		goto err_ip_mib;
-	if (snmp6_mib_init((void **)icmpv6_statistics, sizeof (struct icmpv6_mib),
+	if (snmp6_mib_init((void **)ve_icmpv6_statistics, sizeof (struct icmpv6_mib),
 			   __alignof__(struct icmpv6_mib)) < 0)
 		goto err_icmp_mib;
-	if (snmp6_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib),
+	if (snmp6_mib_init((void **)ve_udp_stats_in6, sizeof (struct udp_mib),
 			   __alignof__(struct udp_mib)) < 0)
 		goto err_udp_mib;
 	return 0;
 
 err_udp_mib:
-	snmp6_mib_free((void **)icmpv6_statistics);
+	snmp6_mib_free((void **)ve_icmpv6_statistics);
 err_icmp_mib:
-	snmp6_mib_free((void **)ipv6_statistics);
+	snmp6_mib_free((void **)ve_ipv6_statistics);
 err_ip_mib:
 	return -ENOMEM;
 	
@@ -737,9 +752,9 @@ err_ip_mib:
 
 static void cleanup_ipv6_mibs(void)
 {
-	snmp6_mib_free((void **)ipv6_statistics);
-	snmp6_mib_free((void **)icmpv6_statistics);
-	snmp6_mib_free((void **)udp_stats_in6);
+	snmp6_mib_free((void **)ve_ipv6_statistics);
+	snmp6_mib_free((void **)ve_icmpv6_statistics);
+	snmp6_mib_free((void **)ve_udp_stats_in6);
 }
 
 static int __init inet6_init(void)
diff -upr linux-2.6.16.46-0.12.orig/net/ipv6/anycast.c linux-2.6.16.46-0.12-027test011/net/ipv6/anycast.c
--- linux-2.6.16.46-0.12.orig/net/ipv6/anycast.c	2007-08-24 19:28:09.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv6/anycast.c	2007-08-28 17:35:32.000000000 +0400
@@ -83,7 +83,7 @@ int ipv6_sock_ac_join(struct sock *sk, i
 	struct net_device *dev = NULL;
 	struct inet6_dev *idev;
 	struct ipv6_ac_socklist *pac;
-	int	ishost = !ipv6_devconf.forwarding;
+	int	ishost = !ve_ipv6_devconf.forwarding;
 	int	err = 0;
 
 	if (!capable(CAP_NET_ADMIN))
@@ -455,6 +455,8 @@ static inline struct ifacaddr6 *ac6_get_
 	     state->dev;
 	     state->dev = state->dev->next) {
 		struct inet6_dev *idev;
+		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+			continue;
 		idev = in6_dev_get(state->dev);
 		if (!idev)
 			continue;
@@ -485,6 +487,8 @@ static struct ifacaddr6 *ac6_get_next(st
 			state->idev = NULL;
 			break;
 		}
+		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+			continue;
 		state->idev = in6_dev_get(state->dev);
 		if (!state->idev)
 			continue;
@@ -580,7 +584,7 @@ static struct file_operations ac6_seq_fo
 
 int __init ac6_proc_init(void)
 {
-	if (!proc_net_fops_create("anycast6", S_IRUGO, &ac6_seq_fops))
+	if (!proc_glob_fops_create("net/anycast6", S_IRUGO, &ac6_seq_fops))
 		return -ENOMEM;
 
 	return 0;
@@ -588,7 +592,7 @@ int __init ac6_proc_init(void)
 
 void ac6_proc_exit(void)
 {
-	proc_net_remove("anycast6");
+	remove_proc_glob_entry("net/anycast6", NULL);
 }
 #endif
 
diff -upr linux-2.6.16.46-0.12.orig/net/ipv6/inet6_connection_sock.c linux-2.6.16.46-0.12-027test011/net/ipv6/inet6_connection_sock.c
--- linux-2.6.16.46-0.12.orig/net/ipv6/inet6_connection_sock.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv6/inet6_connection_sock.c	2007-08-28 17:35:32.000000000 +0400
@@ -26,6 +26,8 @@
 #include <net/ip6_route.h>
 #include <net/sock.h>
 #include <net/inet6_connection_sock.h>
+#include <ub/ub_net.h>
+#include <ub/ub_orphan.h>
 
 int inet6_csk_bind_conflict(const struct sock *sk,
 			    const struct inet_bind_bucket *tb)
@@ -36,6 +38,7 @@ int inet6_csk_bind_conflict(const struct
 	/* We must walk the whole port owner list in this case. -DaveM */
 	sk_for_each_bound(sk2, node, &tb->owners) {
 		if (sk != sk2 &&
+		    ve_accessible_strict(sk->owner_env, sk2->owner_env) &&
 		    (!sk->sk_bound_dev_if ||
 		     !sk2->sk_bound_dev_if ||
 		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
diff -upr linux-2.6.16.46-0.12.orig/net/ipv6/inet6_hashtables.c linux-2.6.16.46-0.12-027test011/net/ipv6/inet6_hashtables.c
--- linux-2.6.16.46-0.12.orig/net/ipv6/inet6_hashtables.c	2007-08-24 19:28:10.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv6/inet6_hashtables.c	2007-08-28 17:35:32.000000000 +0400
@@ -31,9 +31,15 @@ struct sock *inet6_lookup_listener(struc
 	const struct hlist_node *node;
 	struct sock *result = NULL;
 	int score, hiscore = 0;
+	struct ve_struct *env;
+
+	env = get_exec_env();
 
 	read_lock(&hashinfo->lhash_lock);
-	sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) {
+	sk_for_each(sk, node, &hashinfo->listening_hash[
+			inet_lhashfn(hnum, VEID(env))]) {
+		if (!ve_accessible_strict(sk->owner_env, env))
+			continue;
 		if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
 			const struct ipv6_pinfo *np = inet6_sk(sk);
 			
@@ -84,7 +90,8 @@ EXPORT_SYMBOL_GPL(inet6_lookup);
 
 static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 				     struct sock *sk, const __u16 lport,
-				     struct inet_timewait_sock **twp)
+				     struct inet_timewait_sock **twp,
+				     struct ve_struct *ve)
 {
 	struct inet_hashinfo *hinfo = death_row->hashinfo;
 	struct inet_sock *inet = inet_sk(sk);
@@ -94,7 +101,7 @@ static int __inet6_check_established(str
 	const int dif = sk->sk_bound_dev_if;
 	const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
 	const unsigned int hash = inet6_ehashfn(daddr, lport, saddr,
-						inet->dport);
+						inet->dport, VEID(ve));
 	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
 	struct sock *sk2;
 	const struct hlist_node *node;
@@ -113,7 +120,8 @@ static int __inet6_check_established(str
 		   sk2->sk_family	       == PF_INET6	 &&
 		   ipv6_addr_equal(&tw6->tw_v6_daddr, saddr)	 &&
 		   ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) &&
-		   sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
+		   sk2->sk_bound_dev_if == sk->sk_bound_dev_if &&
+		   ve_accessible_strict(tw->tw_owner_env, VEID(ve))) {
 			if (twsk_unique(sk, sk2, twp))
 				goto unique;
 			else
@@ -124,7 +132,7 @@ static int __inet6_check_established(str
 
 	/* And established part... */
 	sk_for_each(sk2, node, &head->chain) {
-		if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
+		if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif, ve))
 			goto not_unique;
 	}
 
@@ -173,7 +181,9 @@ int inet6_hash_connect(struct inet_timew
  	struct inet_bind_hashbucket *head;
  	struct inet_bind_bucket *tb;
 	int ret;
+	struct ve_struct *ve;
 
+	ve = sk->owner_env;
  	if (snum == 0) {
  		const int low = sysctl_local_port_range[0];
  		const int high = sysctl_local_port_range[1];
@@ -187,7 +197,8 @@ int inet6_hash_connect(struct inet_timew
  		local_bh_disable();
 		for (i = 1; i <= range; i++) {
 			port = low + (i + offset) % range;
- 			head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
+ 			head = &hinfo->bhash[inet_bhashfn(port,
+					hinfo->bhash_size, VEID(ve))];
  			spin_lock(&head->lock);
 
  			/* Does not bother with rcv_saddr checks,
@@ -195,20 +206,21 @@ int inet6_hash_connect(struct inet_timew
  			 * unique enough.
  			 */
 			inet_bind_bucket_for_each(tb, node, &head->chain) {
- 				if (tb->port == port) {
+ 				if (tb->port == port &&
+				    ve_accessible_strict(tb->owner_env, ve)) {
  					BUG_TRAP(!hlist_empty(&tb->owners));
  					if (tb->fastreuse >= 0)
  						goto next_port;
  					if (!__inet6_check_established(death_row,
 								       sk, port,
-								       &tw))
+								       &tw, ve))
  						goto ok;
  					goto next_port;
  				}
  			}
 
  			tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
-						     head, port);
+						     head, port, ve);
  			if (!tb) {
  				spin_unlock(&head->lock);
  				break;
@@ -243,7 +255,7 @@ ok:
 		goto out;
  	}
 
- 	head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
+ 	head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size, VEID(ve))];
  	tb   = inet_csk(sk)->icsk_bind_hash;
 	spin_lock_bh(&head->lock);
 
@@ -254,7 +266,7 @@ ok:
 	} else {
 		spin_unlock(&head->lock);
 		/* No definite answer... Walk to established hash table */
-		ret = __inet6_check_established(death_row, sk, snum, NULL);
+		ret = __inet6_check_established(death_row, sk, snum, NULL, ve);
 out:
 		local_bh_enable();
 		return ret;
diff -upr linux-2.6.16.46-0.12.orig/net/ipv6/ip6_fib.c linux-2.6.16.46-0.12-027test011/net/ipv6/ip6_fib.c
--- linux-2.6.16.46-0.12.orig/net/ipv6/ip6_fib.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv6/ip6_fib.c	2007-08-28 17:35:32.000000000 +0400
@@ -1128,8 +1128,12 @@ static int fib6_age(struct rt6_info *rt,
 
 static DEFINE_SPINLOCK(fib6_gc_lock);
 
+LIST_HEAD(fib6_table_list);
+
 void fib6_run_gc(unsigned long dummy)
 {
+	struct fib6_table *tbl;
+
 	if (dummy != ~0UL) {
 		spin_lock_bh(&fib6_gc_lock);
 		gc_args.timeout = dummy ? (int)dummy : ip6_rt_gc_interval;
@@ -1147,7 +1151,11 @@ void fib6_run_gc(unsigned long dummy)
 
 	write_lock_bh(&rt6_lock);
 	ndisc_dst_gc(&gc_args.more);
-	fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL);
+	list_for_each_entry(tbl, &fib6_table_list, list) {
+		struct ve_struct *old_env = set_exec_env(tbl->owner_env);
+		fib6_clean_tree(&tbl->root, fib6_age, 0, NULL);
+		set_exec_env(old_env);
+	}
 	write_unlock_bh(&rt6_lock);
 
 	if (gc_args.more)
@@ -1163,7 +1171,7 @@ void __init fib6_init(void)
 {
 	fib6_node_kmem = kmem_cache_create("fib6_nodes",
 					   sizeof(struct fib6_node),
-					   0, SLAB_HWCACHE_ALIGN,
+					   0, SLAB_HWCACHE_ALIGN | SLAB_UBC,
 					   NULL, NULL);
 	if (!fib6_node_kmem)
 		panic("cannot create fib6_nodes cache");
diff -upr linux-2.6.16.46-0.12.orig/net/ipv6/ip6_flowlabel.c linux-2.6.16.46-0.12-027test011/net/ipv6/ip6_flowlabel.c
--- linux-2.6.16.46-0.12.orig/net/ipv6/ip6_flowlabel.c	2007-08-24 19:28:08.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv6/ip6_flowlabel.c	2007-08-28 17:35:32.000000000 +0400
@@ -417,6 +417,9 @@ int ipv6_flowlabel_opt(struct sock *sk, 
 	struct ipv6_fl_socklist *sfl, **sflp;
 	struct ip6_flowlabel *fl;
 
+	if (!ve_is_super(get_exec_env()))
+		return -EPERM;
+
 	if (optlen < sizeof(freq))
 		return -EINVAL;
 
diff -upr linux-2.6.16.46-0.12.orig/net/ipv6/ip6_output.c linux-2.6.16.46-0.12-027test011/net/ipv6/ip6_output.c
--- linux-2.6.16.46-0.12.orig/net/ipv6/ip6_output.c	2007-08-24 19:28:35.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv6/ip6_output.c	2007-08-28 17:35:32.000000000 +0400
@@ -319,7 +319,7 @@ int ip6_forward(struct sk_buff *skb)
 	struct ipv6hdr *hdr = skb->nh.ipv6h;
 	struct inet6_skb_parm *opt = IP6CB(skb);
 	
-	if (ipv6_devconf.forwarding == 0)
+	if (ve_ipv6_devconf.forwarding == 0)
 		goto error;
 
 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
@@ -407,6 +407,20 @@ int ip6_forward(struct sk_buff *skb)
 		return -EMSGSIZE;
 	}
 
+	/*
+	 * We try to optimize forwarding of VE packets:
+	 * do not decrement TTL (and so save skb_cow)
+	 * during forwarding of outgoing pkts from VE.
+	 * For incoming pkts we still do ttl decr,
+	 * since such skb is not cloned and does not require
+	 * actual cow. So, there is at least one place
+	 * in pkts path with mandatory ttl decr, that is
+	 * sufficient to prevent routing loops.
+	 */
+	hdr = skb->nh.ipv6h;
+	if (skb->dev->features & NETIF_F_VENET) /* src is VENET device */
+		goto no_ttl_decr;
+
 	if (skb_cow(skb, dst->dev->hard_header_len)) {
 		IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
 		goto drop;
@@ -418,6 +432,7 @@ int ip6_forward(struct sk_buff *skb)
  
 	hdr->hop_limit--;
 
+no_ttl_decr:
 	IP6_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
 	return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
 
diff -upr linux-2.6.16.46-0.12.orig/net/ipv6/mcast.c linux-2.6.16.46-0.12-027test011/net/ipv6/mcast.c
--- linux-2.6.16.46-0.12.orig/net/ipv6/mcast.c	2007-08-24 19:28:09.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv6/mcast.c	2007-08-28 17:35:33.000000000 +0400
@@ -156,7 +156,7 @@ static int ip6_mc_leave_src(struct sock 
 #define IGMP6_UNSOLICITED_IVAL	(10*HZ)
 #define MLD_QRV_DEFAULT		2
 
-#define MLD_V1_SEEN(idev) (ipv6_devconf.force_mld_version == 1 || \
+#define MLD_V1_SEEN(idev) (ve_ipv6_devconf.force_mld_version == 1 || \
 		(idev)->cnf.force_mld_version == 1 || \
 		((idev)->mc_v1_seen && \
 		time_before(jiffies, (idev)->mc_v1_seen)))
@@ -248,6 +248,7 @@ int ipv6_sock_mc_join(struct sock *sk, i
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(ipv6_sock_mc_join);
 
 /*
  *	socket leave on multicast group
@@ -2168,15 +2169,18 @@ static void igmp6_leave_group(struct ifm
 static void mld_gq_timer_expire(unsigned long data)
 {
 	struct inet6_dev *idev = (struct inet6_dev *)data;
+	struct ve_struct *old_env = set_exec_env(idev->dev->owner_env);
 
 	idev->mc_gq_running = 0;
 	mld_send_report(idev, NULL);
 	__in6_dev_put(idev);
+	set_exec_env(old_env);
 }
 
 static void mld_ifc_timer_expire(unsigned long data)
 {
 	struct inet6_dev *idev = (struct inet6_dev *)data;
+	struct ve_struct *old_env = set_exec_env(idev->dev->owner_env);
 
 	mld_send_cr(idev);
 	if (idev->mc_ifc_count) {
@@ -2185,6 +2189,7 @@ static void mld_ifc_timer_expire(unsigne
 			mld_ifc_start_timer(idev, idev->mc_maxdelay);
 	}
 	__in6_dev_put(idev);
+	set_exec_env(old_env);
 }
 
 static void mld_ifc_event(struct inet6_dev *idev)
@@ -2199,6 +2204,7 @@ static void mld_ifc_event(struct inet6_d
 static void igmp6_timer_handler(unsigned long data)
 {
 	struct ifmcaddr6 *ma = (struct ifmcaddr6 *) data;
+	struct ve_struct *old_env = set_exec_env(ma->idev->dev->owner_env);
 
 	if (MLD_V1_SEEN(ma->idev))
 		igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
@@ -2210,6 +2216,7 @@ static void igmp6_timer_handler(unsigned
 	ma->mca_flags &= ~MAF_TIMER_RUNNING;
 	spin_unlock(&ma->mca_lock);
 	ma_put(ma);
+	set_exec_env(old_env);
 }
 
 /* Device going down */
@@ -2327,6 +2334,8 @@ static inline struct ifmcaddr6 *igmp6_mc
 	     state->dev; 
 	     state->dev = state->dev->next) {
 		struct inet6_dev *idev;
+		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+			continue;
 		idev = in6_dev_get(state->dev);
 		if (!idev)
 			continue;
@@ -2357,6 +2366,8 @@ static struct ifmcaddr6 *igmp6_mc_get_ne
 			state->idev = NULL;
 			break;
 		}
+		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+			continue;
 		state->idev = in6_dev_get(state->dev);
 		if (!state->idev)
 			continue;
@@ -2472,6 +2483,8 @@ static inline struct ip6_sf_list *igmp6_
 	     state->dev; 
 	     state->dev = state->dev->next) {
 		struct inet6_dev *idev;
+		if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+			continue;
 		idev = in6_dev_get(state->dev);
 		if (unlikely(idev == NULL))
 			continue;
@@ -2511,6 +2524,8 @@ static struct ip6_sf_list *igmp6_mcf_get
 				state->idev = NULL;
 				goto out;
 			}
+			if (unlikely(!ve_accessible_strict(state->dev->owner_env, get_exec_env())))
+				continue;
 			state->idev = in6_dev_get(state->dev);
 			if (!state->idev)
 				continue;
@@ -2653,8 +2668,8 @@ int __init igmp6_init(struct net_proto_f
 	np->hop_limit = 1;
 
 #ifdef CONFIG_PROC_FS
-	proc_net_fops_create("igmp6", S_IRUGO, &igmp6_mc_seq_fops);
-	proc_net_fops_create("mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops);
+	proc_glob_fops_create("net/igmp6", S_IRUGO, &igmp6_mc_seq_fops);
+	proc_glob_fops_create("net/mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops);
 #endif
 
 	return 0;
@@ -2666,7 +2681,7 @@ void igmp6_cleanup(void)
 	igmp6_socket = NULL; /* for safety */
 
 #ifdef CONFIG_PROC_FS
-	proc_net_remove("mcfilter6");
-	proc_net_remove("igmp6");
+	remove_proc_glob_entry("net/mcfilter6", NULL);
+	remove_proc_glob_entry("net/igmp6", NULL);
 #endif
 }
diff -upr linux-2.6.16.46-0.12.orig/net/ipv6/ndisc.c linux-2.6.16.46-0.12-027test011/net/ipv6/ndisc.c
--- linux-2.6.16.46-0.12.orig/net/ipv6/ndisc.c	2007-08-24 19:28:21.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv6/ndisc.c	2007-08-28 17:35:32.000000000 +0400
@@ -124,7 +124,7 @@ static struct neigh_ops ndisc_direct_ops
 	.queue_xmit =		dev_queue_xmit,
 };
 
-struct neigh_table nd_tbl = {
+struct neigh_table global_nd_tbl = {
 	.family =	AF_INET6,
 	.entry_size =	sizeof(struct neighbour) + sizeof(struct in6_addr),
 	.key_len =	sizeof(struct in6_addr),
@@ -135,7 +135,7 @@ struct neigh_table nd_tbl = {
 	.proxy_redo =	pndisc_redo,
 	.id =		"ndisc_cache",
 	.parms = {
-		.tbl =			&nd_tbl,
+		.tbl =			&global_nd_tbl,
 		.base_reachable_time =	30 * HZ,
 		.retrans_time =	 1 * HZ,
 		.gc_staletime =	60 * HZ,
@@ -457,7 +457,9 @@ static void ndisc_send_na(struct net_dev
 			inc_opt = 0;
 	}
 
-	skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev),
+	skb = sock_alloc_send_skb(sk,
+				  (MAX_HEADER + sizeof(struct ipv6hdr) +
+				   len + LL_RESERVED_SPACE(dev)),
 				  1, &err);
 
 	if (skb == NULL) {
@@ -545,7 +547,9 @@ void ndisc_send_ns(struct net_device *de
 	if (send_llinfo)
 		len += ndisc_opt_addr_space(dev);
 
-	skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev),
+	skb = sock_alloc_send_skb(sk,
+				  (MAX_HEADER + sizeof(struct ipv6hdr) +
+				   len + LL_RESERVED_SPACE(dev)),
 				  1, &err);
 	if (skb == NULL) {
 		ND_PRINTK0(KERN_ERR
@@ -619,7 +623,9 @@ void ndisc_send_rs(struct net_device *de
 	if (dev->addr_len)
 		len += ndisc_opt_addr_space(dev);
 
-        skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev),
+        skb = sock_alloc_send_skb(sk,
+				  (MAX_HEADER + sizeof(struct ipv6hdr) +
+				   len + LL_RESERVED_SPACE(dev)),
 				  1, &err);
 	if (skb == NULL) {
 		ND_PRINTK0(KERN_ERR
@@ -1379,7 +1385,9 @@ void ndisc_send_redirect(struct sk_buff 
 	rd_len &= ~0x7;
 	len += rd_len;
 
-	buff = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev),
+	buff = sock_alloc_send_skb(sk,
+				   (MAX_HEADER + sizeof(struct ipv6hdr) +
+				    len + LL_RESERVED_SPACE(dev)),
 				   1, &err);
 	if (buff == NULL) {
 		ND_PRINTK0(KERN_ERR
@@ -1663,7 +1671,9 @@ int __init ndisc_init(struct net_proto_f
          * Initialize the neighbour table
          */
 	
-	neigh_table_init(&nd_tbl);
+	get_ve0()->ve_nd_tbl = &global_nd_tbl;
+	if (neigh_table_init(&nd_tbl))
+		panic("cannot initialize IPv6 NDISC tables\n");
 
 #ifdef CONFIG_SYSCTL
 	neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6, NET_IPV6_NEIGH, 
@@ -1685,3 +1695,52 @@ void ndisc_cleanup(void)
 	sock_release(ndisc_socket);
 	ndisc_socket = NULL; /* For safety. */
 }
+
+#ifdef CONFIG_VE
+int ve_ndisc_init(struct ve_struct *ve)
+{
+	struct ve_struct *old_env;
+	int err;
+
+	ve->ve_nd_tbl = kmalloc(sizeof(struct neigh_table), GFP_KERNEL);
+	if (ve->ve_nd_tbl == NULL)
+		return -ENOMEM;
+
+	*(ve->ve_nd_tbl) = global_nd_tbl;
+	ve->ve_nd_tbl->parms.tbl = ve->ve_nd_tbl;
+	old_env = set_exec_env(ve);
+	err = neigh_table_init(ve->ve_nd_tbl);
+	if (err)
+		goto out_free;
+#ifdef CONFIG_SYSCTL
+	neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6, NET_IPV6_NEIGH,
+			      "ipv6",
+			      &ndisc_ifinfo_sysctl_change,
+			      &ndisc_ifinfo_sysctl_strategy);
+#endif
+	err = 0;
+
+out:
+	set_exec_env(old_env);
+	return err;
+
+out_free:
+	kfree(ve->ve_nd_tbl);
+	ve->ve_nd_tbl = NULL;
+	goto out;
+}
+EXPORT_SYMBOL(ve_ndisc_init);
+
+void ve_ndisc_fini(struct ve_struct *ve)
+{
+	if (ve->ve_nd_tbl) {
+#ifdef CONFIG_SYSCTL
+		neigh_sysctl_unregister(&ve->ve_nd_tbl->parms);
+#endif
+		neigh_table_clear(ve->ve_nd_tbl);
+		kfree(ve->ve_nd_tbl);
+		ve->ve_nd_tbl = NULL;
+	}
+}
+EXPORT_SYMBOL(ve_ndisc_fini);
+#endif /* CONFIG_VE */
diff -upr linux-2.6.16.46-0.12.orig/net/ipv6/netfilter/ip6_queue.c linux-2.6.16.46-0.12-027test011/net/ipv6/netfilter/ip6_queue.c
--- linux-2.6.16.46-0.12.orig/net/ipv6/netfilter/ip6_queue.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv6/netfilter/ip6_queue.c	2007-08-28 17:35:32.000000000 +0400
@@ -540,8 +540,11 @@ ipq_rcv_sk(struct sock *sk, int len)
 	down(&ipqnl_sem);
 			
 	for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
+		struct ve_struct *env;
 		skb = skb_dequeue(&sk->sk_receive_queue);
+		env = set_exec_env(skb->owner_env);
 		ipq_rcv_skb(skb);
+		(void)set_exec_env(env);
 		kfree_skb(skb);
 	}
 		
diff -upr linux-2.6.16.46-0.12.orig/net/ipv6/netfilter/ip6_tables.c linux-2.6.16.46-0.12-027test011/net/ipv6/netfilter/ip6_tables.c
--- linux-2.6.16.46-0.12.orig/net/ipv6/netfilter/ip6_tables.c	2007-08-24 19:28:08.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv6/netfilter/ip6_tables.c	2007-08-28 17:35:36.000000000 +0400
@@ -32,9 +32,11 @@
 #include <asm/semaphore.h>
 #include <linux/proc_fs.h>
 #include <linux/cpumask.h>
+#include <ub/ub_mem.h>
 
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter/x_tables.h>
+#include <linux/nfcalls.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -1150,9 +1152,14 @@ do_ip6t_set_ctl(struct sock *sk, int cmd
 {
 	int ret;
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!capable(CAP_VE_NET_ADMIN))
 		return -EPERM;
 
+#ifdef CONFIG_VE_IPTABLES
+	if (!get_exec_env()->_xt_tables[AF_INET6].next)
+		return -ENOENT;
+#endif
+
 	switch (cmd) {
 	case IP6T_SO_SET_REPLACE:
 		ret = do_replace(user, len);
@@ -1175,9 +1182,14 @@ do_ip6t_get_ctl(struct sock *sk, int cmd
 {
 	int ret;
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!capable(CAP_VE_NET_ADMIN))
 		return -EPERM;
 
+#ifdef CONFIG_VE_IPTABLES
+	if (!get_exec_env()->_xt_tables[AF_INET6].next)
+		return -ENOENT;
+#endif
+
 	switch (cmd) {
 	case IP6T_SO_GET_INFO: {
 		char name[IP6T_TABLE_MAXNAMELEN];
@@ -1273,18 +1285,18 @@ do_ip6t_get_ctl(struct sock *sk, int cmd
 	return ret;
 }
 
-int ip6t_register_table(struct xt_table *table,
+struct ip6t_table *ip6t_register_table(struct xt_table *table,
 			const struct ip6t_replace *repl)
 {
 	int ret;
 	struct xt_table_info *newinfo;
 	static struct xt_table_info bootstrap
-		= { 0, 0, 0, { 0 }, { 0 }, { } };
+		= { 0, 0, 0, 0, { 0 }, { 0 }, { } };
 	void *loc_cpu_entry;
 
 	newinfo = xt_alloc_table_info(repl->size);
 	if (!newinfo)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	/* choose the copy on our node/cpu */
 	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
@@ -1297,27 +1309,29 @@ int ip6t_register_table(struct xt_table 
 			      repl->underflow);
 	if (ret != 0) {
 		xt_free_table_info(newinfo);
-		return ret;
+		return ERR_PTR(ret);
 	}
 
-	if (xt_register_table(table, &bootstrap, newinfo) != 0) {
+	table = virt_xt_register_table(table, &bootstrap, newinfo);
+	if (IS_ERR(table))
 		xt_free_table_info(newinfo);
-		return ret;
-	}
-
-	return 0;
+	return table;
 }
 
 void ip6t_unregister_table(struct xt_table *table)
 {
 	struct xt_table_info *private;
 	void *loc_cpu_entry;
+	struct module *me;
 
-	private = xt_unregister_table(table);
+	me = table->me;
+	private = virt_xt_unregister_table(table);
 
 	/* Decrease module usage counts and free resources */
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
 	IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
+	if (private->number > private->initial_entries)
+		module_put(me);
 	xt_free_table_info(private);
 }
 
@@ -1407,36 +1421,78 @@ static struct ip6t_match icmp6_matchstru
 	.checkentry	= &icmp6_checkentry,
 };
 
+static int init_ip6tables(void)
+{
+#ifdef CONFIG_VE_IPTABLES
+	if (get_exec_env()->_xt_tables[AF_INET6].next != NULL)
+		return -EEXIST;
+#endif
+
+	return xt_proto_init(AF_INET6);
+}
+
+static void fini_ip6tables(void)
+{
+#ifdef CONFIG_VE_IPTABLES
+	get_exec_env()->_xt_tables[AF_INET6].next = NULL;
+#endif
+	xt_proto_fini(AF_INET6);
+}
+
 static int __init init(void)
 {
 	int ret;
 
-	xt_proto_init(AF_INET6);
+	ret = init_ip6tables();
+	if (ret)
+		goto err1;
 
 	/* Noone else will be downing sem now, so we won't sleep */
-	xt_register_target(AF_INET6, &ip6t_standard_target);
-	xt_register_target(AF_INET6, &ip6t_error_target);
-	xt_register_match(AF_INET6, &icmp6_matchstruct);
+	ret = xt_register_target(AF_INET6, &ip6t_standard_target);
+	if (ret < 0)
+		goto err2;
+	ret = xt_register_target(AF_INET6, &ip6t_error_target);
+	if (ret < 0)
+		goto err3;
+	ret = xt_register_match(AF_INET6, &icmp6_matchstruct);
+	if (ret < 0)
+		goto err4;
 
 	/* Register setsockopt */
 	ret = nf_register_sockopt(&ip6t_sockopts);
 	if (ret < 0) {
 		duprintf("Unable to register sockopts.\n");
-		xt_proto_fini(AF_INET6);
-		return ret;
+		goto err5;
 	}
 
+	KSYMRESOLVE(init_ip6tables);
+	KSYMRESOLVE(fini_ip6tables);
+	KSYMMODRESOLVE(ip6_tables);
 	printk("ip6_tables: (C) 2000-2006 Netfilter Core Team\n");
 	return 0;
+
+err5:
+	xt_unregister_match(AF_INET6, &icmp6_matchstruct);
+err4:
+	xt_unregister_target(AF_INET6, &ip6t_error_target);
+err3:
+	xt_unregister_target(AF_INET6, &ip6t_standard_target);
+err2:
+	fini_ip6tables();
+err1:
+	return ret;
 }
 
 static void __exit fini(void)
 {
+	KSYMMODUNRESOLVE(ip6_tables);
+	KSYMUNRESOLVE(init_ip6tables);
+	KSYMUNRESOLVE(fini_ip6tables);
 	nf_unregister_sockopt(&ip6t_sockopts);
 	xt_unregister_match(AF_INET6, &icmp6_matchstruct);
 	xt_unregister_target(AF_INET6, &ip6t_error_target);
 	xt_unregister_target(AF_INET6, &ip6t_standard_target);
-	xt_proto_fini(AF_INET6);
+	fini_ip6tables();
 }
 
 /*
@@ -1522,5 +1578,5 @@ EXPORT_SYMBOL(ip6t_ext_hdr);
 EXPORT_SYMBOL(ipv6_find_hdr);
 EXPORT_SYMBOL(ip6_masked_addrcmp);
 
-module_init(init);
+subsys_initcall(init);
 module_exit(fini);
diff -upr linux-2.6.16.46-0.12.orig/net/ipv6/netfilter/ip6t_REJECT.c linux-2.6.16.46-0.12-027test011/net/ipv6/netfilter/ip6t_REJECT.c
--- linux-2.6.16.46-0.12.orig/net/ipv6/netfilter/ip6t_REJECT.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv6/netfilter/ip6t_REJECT.c	2007-08-28 17:35:32.000000000 +0400
@@ -26,6 +26,7 @@
 #include <net/ip6_checksum.h>
 #include <net/ip6_fib.h>
 #include <net/ip6_route.h>
+#include <linux/nfcalls.h>
 #include <net/flow.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter_ipv6/ip6t_REJECT.h>
@@ -268,17 +269,39 @@ static struct ip6t_target ip6t_reject_re
 	.me		= THIS_MODULE
 };
 
-static int __init init(void)
+int init_ip6table_REJECT(void)
 {
 	if (ip6t_register_target(&ip6t_reject_reg))
 		return -EINVAL;
 	return 0;
 }
 
-static void __exit fini(void)
+void fini_ip6table_REJECT(void)
 {
 	ip6t_unregister_target(&ip6t_reject_reg);
 }
 
+static int __init init(void)
+{
+	int err;
+
+	err = init_ip6table_REJECT();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_ip6table_REJECT);
+	KSYMRESOLVE(fini_ip6table_REJECT);
+	KSYMMODRESOLVE(ip6t_REJECT);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(ip6t_REJECT);
+	KSYMUNRESOLVE(init_ip6table_REJECT);
+	KSYMUNRESOLVE(fini_ip6table_REJECT);
+	fini_ip6table_REJECT();
+}
+
 module_init(init);
 module_exit(fini);
diff -upr linux-2.6.16.46-0.12.orig/net/ipv6/netfilter/ip6t_multiport.c linux-2.6.16.46-0.12-027test011/net/ipv6/netfilter/ip6t_multiport.c
--- linux-2.6.16.46-0.12.orig/net/ipv6/netfilter/ip6t_multiport.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv6/netfilter/ip6t_multiport.c	2007-08-28 17:35:32.000000000 +0400
@@ -14,6 +14,7 @@
 #include <linux/udp.h>
 #include <linux/skbuff.h>
 #include <linux/in.h>
+#include <linux/nfcalls.h>
 
 #include <linux/netfilter_ipv6/ip6t_multiport.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
@@ -112,15 +113,37 @@ static struct ip6t_match multiport_match
 	.me		= THIS_MODULE,
 };
 
-static int __init init(void)
+int init_ip6table_multiport(void)
 {
 	return ip6t_register_match(&multiport_match);
 }
 
-static void __exit fini(void)
+void fini_ip6table_multiport(void)
 {
 	ip6t_unregister_match(&multiport_match);
 }
 
+static int __init init(void)
+{
+	int err;
+
+	err = init_ip6table_multiport();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_ip6table_multiport);
+	KSYMRESOLVE(fini_ip6table_multiport);
+	KSYMMODRESOLVE(ip6t_multiport);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(ip6t_multiport);
+	KSYMUNRESOLVE(init_ip6table_multiport);
+	KSYMUNRESOLVE(fini_ip6table_multiport);
+	fini_ip6table_multiport();
+}
+
 module_init(init);
 module_exit(fini);
diff -upr linux-2.6.16.46-0.12.orig/net/ipv6/netfilter/ip6table_filter.c linux-2.6.16.46-0.12-027test011/net/ipv6/netfilter/ip6table_filter.c
--- linux-2.6.16.46-0.12.orig/net/ipv6/netfilter/ip6table_filter.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv6/netfilter/ip6table_filter.c	2007-08-28 17:35:32.000000000 +0400
@@ -11,12 +11,20 @@
 
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/nfcalls.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("ip6tables filter table");
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_packet_filter	(get_exec_env()->_ve_ip6t_filter_pf)
+#else
+#define	ve_packet_filter	&packet_filter
+#endif
+
 #define FILTER_VALID_HOOKS ((1 << NF_IP6_LOCAL_IN) | (1 << NF_IP6_FORWARD) | (1 << NF_IP6_LOCAL_OUT))
 
 /* Standard entry. */
@@ -43,7 +51,7 @@ static struct
 	struct ip6t_replace repl;
 	struct ip6t_standard entries[3];
 	struct ip6t_error term;
-} initial_table __initdata
+} initial_table
 = { { "filter", FILTER_VALID_HOOKS, 4,
       sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error),
       { [NF_IP6_LOCAL_IN] = 0,
@@ -108,7 +116,7 @@ ip6t_hook(unsigned int hook,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL);
+	return ip6t_do_table(pskb, hook, in, out, ve_packet_filter, NULL);
 }
 
 static unsigned int
@@ -128,7 +136,7 @@ ip6t_local_out_hook(unsigned int hook,
 	}
 #endif
 
-	return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL);
+	return ip6t_do_table(pskb, hook, in, out, ve_packet_filter, NULL);
 }
 
 static struct nf_hook_ops ip6t_ops[] = {
@@ -159,22 +167,19 @@ static struct nf_hook_ops ip6t_ops[] = {
 static int forward = NF_ACCEPT;
 module_param(forward, bool, 0000);
 
-static int __init init(void)
+int init_ip6table_filter(void)
 {
 	int ret;
-
-	if (forward < 0 || forward > NF_MAX_VERDICT) {
-		printk("iptables forward must be 0 or 1\n");
-		return -EINVAL;
-	}
-
-	/* Entry 1 is the FORWARD hook */
-	initial_table.entries[1].target.verdict = -forward - 1;
+	struct ip6t_table *tmp_filter;
 
 	/* Register table */
-	ret = ip6t_register_table(&packet_filter, &initial_table.repl);
-	if (ret < 0)
-		return ret;
+	tmp_filter = ip6t_register_table(&packet_filter,
+			&initial_table.repl);
+	if (IS_ERR(tmp_filter))
+		return PTR_ERR(tmp_filter);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_filter = tmp_filter;
+#endif
 
 	/* Register hooks */
 	ret = nf_register_hook(&ip6t_ops[0]);
@@ -196,19 +201,55 @@ static int __init init(void)
  cleanup_hook0:
 	nf_unregister_hook(&ip6t_ops[0]);
  cleanup_table:
-	ip6t_unregister_table(&packet_filter);
+	ip6t_unregister_table(ve_packet_filter);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_filter = NULL;
+#endif
 
 	return ret;
 }
 
-static void __exit fini(void)
+void fini_ip6table_filter(void)
 {
 	unsigned int i;
 
 	for (i = 0; i < sizeof(ip6t_ops)/sizeof(struct nf_hook_ops); i++)
 		nf_unregister_hook(&ip6t_ops[i]);
 
-	ip6t_unregister_table(&packet_filter);
+	ip6t_unregister_table(ve_packet_filter);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_filter = NULL;
+#endif
+}
+
+static int __init init(void)
+{
+	int err;
+
+	if (forward < 0 || forward > NF_MAX_VERDICT) {
+		printk("iptables forward must be 0 or 1\n");
+		return -EINVAL;
+	}
+
+	/* Entry 1 is the FORWARD hook */
+	initial_table.entries[1].target.verdict = -forward - 1;
+
+	err = init_ip6table_filter();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_ip6table_filter);
+	KSYMRESOLVE(fini_ip6table_filter);
+	KSYMMODRESOLVE(ip6table_filter);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(ip6table_filter);
+	KSYMUNRESOLVE(init_ip6table_filter);
+	KSYMUNRESOLVE(fini_ip6table_filter);
+	fini_ip6table_filter();
 }
 
 module_init(init);
diff -upr linux-2.6.16.46-0.12.orig/net/ipv6/netfilter/ip6table_mangle.c linux-2.6.16.46-0.12-027test011/net/ipv6/netfilter/ip6table_mangle.c
--- linux-2.6.16.46-0.12.orig/net/ipv6/netfilter/ip6table_mangle.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv6/netfilter/ip6table_mangle.c	2007-08-28 17:35:32.000000000 +0400
@@ -12,6 +12,7 @@
  */
 #include <linux/module.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/nfcalls.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -53,7 +54,7 @@ static struct
 	struct ip6t_replace repl;
 	struct ip6t_standard entries[5];
 	struct ip6t_error term;
-} initial_table __initdata
+} initial_table
 = { { "mangle", MANGLE_VALID_HOOKS, 6,
       sizeof(struct ip6t_standard) * 5 + sizeof(struct ip6t_error),
       { [NF_IP6_PRE_ROUTING] 	= 0,
@@ -130,6 +131,13 @@ static struct ip6t_table packet_mangler 
 	.af		= AF_INET6,
 };
 
+#ifdef CONFIG_VE_IPTABLES
+#include <linux/sched.h>
+#define ve_packet_mangler	(get_exec_env()->_ip6t_mangle_table)
+#else
+#define ve_packet_mangler	&packet_mangler
+#endif
+
 /* The work comes in here from netfilter.c. */
 static unsigned int
 ip6t_route_hook(unsigned int hook,
@@ -138,7 +146,7 @@ ip6t_route_hook(unsigned int hook,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+	return ip6t_do_table(pskb, hook, in, out, ve_packet_mangler, NULL);
 }
 
 static unsigned int
@@ -174,7 +182,7 @@ ip6t_local_hook(unsigned int hook,
 	/* flowlabel and prio (includes version, which shouldn't change either */
 	flowlabel = *((u_int32_t *) (*pskb)->nh.ipv6h);
 
-	ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+	ret = ip6t_do_table(pskb, hook, in, out, ve_packet_mangler, NULL);
 
 	if (ret != NF_DROP && ret != NF_STOLEN 
 		&& (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr))
@@ -228,14 +236,19 @@ static struct nf_hook_ops ip6t_ops[] = {
 	},
 };
 
-static int __init init(void)
+int init_ip6table_mangle(void)
 {
 	int ret;
+	struct ip6t_table *tmp_mangler;
 
 	/* Register table */
-	ret = ip6t_register_table(&packet_mangler, &initial_table.repl);
-	if (ret < 0)
-		return ret;
+	tmp_mangler = ip6t_register_table(&packet_mangler,
+			&initial_table.repl);
+	if (IS_ERR(tmp_mangler))
+		return PTR_ERR(tmp_mangler);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_mangler = tmp_mangler;
+#endif
 
 	/* Register hooks */
 	ret = nf_register_hook(&ip6t_ops[0]);
@@ -269,19 +282,47 @@ static int __init init(void)
  cleanup_hook0:
 	nf_unregister_hook(&ip6t_ops[0]);
  cleanup_table:
-	ip6t_unregister_table(&packet_mangler);
+	ip6t_unregister_table(ve_packet_mangler);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_mangler = NULL;
+#endif
 
 	return ret;
 }
 
-static void __exit fini(void)
+void fini_ip6table_mangle(void)
 {
 	unsigned int i;
 
 	for (i = 0; i < sizeof(ip6t_ops)/sizeof(struct nf_hook_ops); i++)
 		nf_unregister_hook(&ip6t_ops[i]);
 
-	ip6t_unregister_table(&packet_mangler);
+	ip6t_unregister_table(ve_packet_mangler);
+#ifdef CONFIG_VE_IPTABLES
+	ve_packet_mangler = NULL;
+#endif
+}
+
+static int __init init(void)
+{
+	int err;
+
+	err = init_ip6table_mangle();
+	if (err < 0)
+		return err;
+
+	KSYMRESOLVE(init_ip6table_mangle);
+	KSYMRESOLVE(fini_ip6table_mangle);
+	KSYMMODRESOLVE(ip6table_mangle);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	KSYMMODUNRESOLVE(ip6table_mangle);
+	KSYMUNRESOLVE(init_ip6table_mangle);
+	KSYMUNRESOLVE(fini_ip6table_mangle);
+	fini_ip6table_mangle();
 }
 
 module_init(init);
diff -upr linux-2.6.16.46-0.12.orig/net/ipv6/netfilter/ip6table_raw.c linux-2.6.16.46-0.12-027test011/net/ipv6/netfilter/ip6table_raw.c
--- linux-2.6.16.46-0.12.orig/net/ipv6/netfilter/ip6table_raw.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv6/netfilter/ip6table_raw.c	2007-08-28 17:35:32.000000000 +0400
@@ -145,11 +145,12 @@ static struct nf_hook_ops ip6t_ops[] = {
 static int __init init(void)
 {
 	int ret;
+	struct ip6t_table *tmp;
 
 	/* Register table */
-	ret = ip6t_register_table(&packet_raw, &initial_table.repl);
-	if (ret < 0)
-		return ret;
+	tmp = ip6t_register_table(&packet_raw, &initial_table.repl);
+	if (IS_ERR(tmp))
+		return PTR_ERR(tmp);
 
 	/* Register hooks */
 	ret = nf_register_hook(&ip6t_ops[0]);
diff -upr linux-2.6.16.46-0.12.orig/net/ipv6/proc.c linux-2.6.16.46-0.12-027test011/net/ipv6/proc.c
--- linux-2.6.16.46-0.12.orig/net/ipv6/proc.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv6/proc.c	2007-08-28 17:35:32.000000000 +0400
@@ -25,13 +25,18 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/stddef.h>
+#include <linux/ve.h>
 #include <net/sock.h>
 #include <net/tcp.h>
 #include <net/transp_v6.h>
 #include <net/ipv6.h>
 
 #ifdef CONFIG_PROC_FS
+#ifdef CONFIG_VE
+#define proc_net_devsnmp6	(get_exec_env()->_proc_net_devsnmp6)
+#else
 static struct proc_dir_entry *proc_net_devsnmp6;
+#endif
 
 static int fold_prot_inuse(struct proto *proto)
 {
@@ -164,9 +169,9 @@ static int snmp6_seq_show(struct seq_fil
 		seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex);
 		snmp6_seq_show_item(seq, (void **)idev->stats.icmpv6, snmp6_icmp6_list);
 	} else {
-		snmp6_seq_show_item(seq, (void **)ipv6_statistics, snmp6_ipstats_list);
-		snmp6_seq_show_item(seq, (void **)icmpv6_statistics, snmp6_icmp6_list);
-		snmp6_seq_show_item(seq, (void **)udp_stats_in6, snmp6_udp6_list);
+		snmp6_seq_show_item(seq, (void **)ve_ipv6_statistics, snmp6_ipstats_list);
+		snmp6_seq_show_item(seq, (void **)ve_icmpv6_statistics, snmp6_icmp6_list);
+		snmp6_seq_show_item(seq, (void **)ve_udp_stats_in6, snmp6_udp6_list);
 	}
 	return 0;
 }
@@ -229,15 +234,27 @@ int snmp6_unregister_dev(struct inet6_de
 	return 0;
 }
 
+int ve_snmp_proc_init(void)
+{
+	proc_net_devsnmp6 = proc_mkdir("dev_snmp6", proc_net);
+	return proc_net_devsnmp6 == NULL ? -ENOMEM : 0;
+}
+EXPORT_SYMBOL(ve_snmp_proc_init);
+
+void ve_snmp_proc_fini(void)
+{
+	proc_net_remove("dev_snmp6");
+}
+EXPORT_SYMBOL(ve_snmp_proc_fini);
+
 int __init ipv6_misc_proc_init(void)
 {
 	int rc = 0;
 
-	if (!proc_net_fops_create("snmp6", S_IRUGO, &snmp6_seq_fops))
+	if (!proc_glob_fops_create("net/snmp6", S_IRUGO, &snmp6_seq_fops))
 		goto proc_snmp6_fail;
 
-	proc_net_devsnmp6 = proc_mkdir("dev_snmp6", proc_net);
-	if (!proc_net_devsnmp6)
+	if (ve_snmp_proc_init())
 		goto proc_dev_snmp6_fail;
 
 	if (!proc_net_fops_create("sockstat6", S_IRUGO, &sockstat6_seq_fops))
@@ -246,9 +263,9 @@ out:
 	return rc;
 
 proc_sockstat6_fail:
-	proc_net_remove("dev_snmp6");
+	ve_snmp_proc_fini();
 proc_dev_snmp6_fail:
-	proc_net_remove("snmp6");
+	remove_proc_glob_entry("net/snmp6", NULL);
 proc_snmp6_fail:
 	rc = -ENOMEM;
 	goto out;
@@ -257,7 +274,7 @@ proc_snmp6_fail:
 void ipv6_misc_proc_exit(void)
 {
 	proc_net_remove("sockstat6");
-	proc_net_remove("dev_snmp6");
+	ve_snmp_proc_fini();
 	proc_net_remove("snmp6");
 }
 
diff -upr linux-2.6.16.46-0.12.orig/net/ipv6/raw.c linux-2.6.16.46-0.12-027test011/net/ipv6/raw.c
--- linux-2.6.16.46-0.12.orig/net/ipv6/raw.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv6/raw.c	2007-08-28 17:35:32.000000000 +0400
@@ -99,6 +99,10 @@ struct sock *__raw_v6_lookup(struct sock
 			if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
 				continue;
 
+			if (!ve_accessible_strict(sk->owner_env,
+						get_exec_env()))
+				continue;
+
 			if (!ipv6_addr_any(&np->rcv_saddr)) {
 				if (ipv6_addr_equal(&np->rcv_saddr, loc_addr))
 					goto found;
@@ -1046,8 +1050,13 @@ static struct sock *raw6_get_next(struct
 	do {
 		sk = sk_next(sk);
 try_again:
-		;
-	} while (sk && sk->sk_family != PF_INET6);
+		if (!sk)
+			break;
+		if (sk->sk_family != PF_INET6)
+			continue;
+		if (ve_accessible(sk->owner_env, get_exec_env()))
+			break;
+	} while (1);
 
 	if (!sk && ++state->bucket < RAWV6_HTABLE_SIZE) {
 		sk = sk_head(&raw_v6_htable[state->bucket]);
@@ -1166,13 +1175,13 @@ static struct file_operations raw6_seq_f
 
 int __init raw6_proc_init(void)
 {
-	if (!proc_net_fops_create("raw6", S_IRUGO, &raw6_seq_fops))
+	if (!proc_glob_fops_create("net/raw6", S_IRUGO, &raw6_seq_fops))
 		return -ENOMEM;
 	return 0;
 }
 
 void raw6_proc_exit(void)
 {
-	proc_net_remove("raw6");
+	remove_proc_glob_entry("net/raw6", NULL);
 }
 #endif	/* CONFIG_PROC_FS */
diff -upr linux-2.6.16.46-0.12.orig/net/ipv6/reassembly.c linux-2.6.16.46-0.12-027test011/net/ipv6/reassembly.c
--- linux-2.6.16.46-0.12.orig/net/ipv6/reassembly.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv6/reassembly.c	2007-08-28 17:35:32.000000000 +0400
@@ -95,6 +95,7 @@ struct frag_queue
 #define FIRST_IN		2
 #define LAST_IN			1
 	__u16			nhoffset;
+	struct ve_struct *owner_env;
 };
 
 /* Hash table. */
@@ -288,6 +289,9 @@ static void ip6_evictor(void)
 static void ip6_frag_expire(unsigned long data)
 {
 	struct frag_queue *fq = (struct frag_queue *) data;
+	struct ve_struct *envid;
+
+	envid = set_exec_env(fq->owner_env);
 
 	spin_lock(&fq->lock);
 
@@ -318,6 +322,8 @@ static void ip6_frag_expire(unsigned lon
 out:
 	spin_unlock(&fq->lock);
 	fq_put(fq, NULL);
+
+	(void)set_exec_env(envid);
 }
 
 /* Creation primitives. */
@@ -336,7 +342,8 @@ static struct frag_queue *ip6_frag_inter
 	hlist_for_each_entry(fq, n, &ip6_frag_hash[hash], list) {
 		if (fq->id == fq_in->id && 
 		    ipv6_addr_equal(&fq_in->saddr, &fq->saddr) &&
-		    ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) {
+		    ipv6_addr_equal(&fq_in->daddr, &fq->daddr) &&
+		    fq->owner_env == get_exec_env()) {
 			atomic_inc(&fq->refcnt);
 			write_unlock(&ip6_frag_lock);
 			fq_in->last_in |= COMPLETE;
@@ -380,6 +387,8 @@ ip6_frag_create(unsigned int hash, u32 i
 	spin_lock_init(&fq->lock);
 	atomic_set(&fq->refcnt, 1);
 
+	fq->owner_env = get_exec_env();
+
 	return ip6_frag_intern(hash, fq);
 
 oom:
@@ -398,7 +407,8 @@ fq_find(u32 id, struct in6_addr *src, st
 	hlist_for_each_entry(fq, n, &ip6_frag_hash[hash], list) {
 		if (fq->id == id && 
 		    ipv6_addr_equal(src, &fq->saddr) &&
-		    ipv6_addr_equal(dst, &fq->daddr)) {
+		    ipv6_addr_equal(dst, &fq->daddr) &&
+		    fq->owner_env == get_exec_env()) {
 			atomic_inc(&fq->refcnt);
 			read_unlock(&ip6_frag_lock);
 			return fq;
@@ -727,6 +737,9 @@ static int ipv6_frag_rcv(struct sk_buff 
 		    fq->meat == fq->len)
 			ret = ip6_frag_reasm(fq, skbp, dev);
 
+		if (ret > 0)
+			(*skbp)->owner_env = skb->owner_env;
+
 		spin_unlock(&fq->lock);
 		fq_put(fq, NULL);
 		return ret;
@@ -737,6 +750,48 @@ static int ipv6_frag_rcv(struct sk_buff 
 	return -1;
 }
 
+#ifdef CONFIG_VE
+/* XXX */
+void ip6_frag_cleanup(struct ve_struct *envid)
+{
+	int i, progress;
+
+	local_bh_disable();
+	do {
+		progress = 0;
+		for (i = 0; i < IP6Q_HASHSZ; i++) {
+			struct frag_queue *fq;
+			struct hlist_node *p, *n;
+
+			if (hlist_empty(&ip6_frag_hash[i]))
+				continue;
+inner_restart:
+			read_lock(&ip6_frag_lock);
+			hlist_for_each_entry_safe(fq, p, n,
+					&ip6_frag_hash[i], list) {
+				if (!ve_accessible_strict(fq->owner_env, envid))
+					continue;
+				atomic_inc(&fq->refcnt);
+				read_unlock(&ip6_frag_lock);
+
+				spin_lock(&fq->lock);
+				if (!(fq->last_in&COMPLETE))
+					fq_kill(fq);
+				spin_unlock(&fq->lock);
+
+				fq_put(fq, NULL);
+				progress = 1;
+				goto inner_restart;
+			}
+			read_unlock(&ip6_frag_lock);
+		}
+	} while(progress);
+	local_bh_enable();
+}
+EXPORT_SYMBOL(ip6_frag_cleanup);
+#endif
+
+
 static struct inet6_protocol frag_protocol =
 {
 	.handler	=	ipv6_frag_rcv,
diff -upr linux-2.6.16.46-0.12.orig/net/ipv6/route.c linux-2.6.16.46-0.12-027test011/net/ipv6/route.c
--- linux-2.6.16.46-0.12.orig/net/ipv6/route.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/ipv6/route.c	2007-08-28 17:35:32.000000000 +0400
@@ -52,7 +52,6 @@
 #include <net/addrconf.h>
 #include <net/tcp.h>
 #include <linux/rtnetlink.h>
-#include <net/dst.h>
 #include <net/xfrm.h>
 
 #include <asm/uaccess.h>
@@ -113,7 +112,6 @@ struct rt6_info ip6_null_entry = {
 		.dst = {
 			.__refcnt	= ATOMIC_INIT(1),
 			.__use		= 1,
-			.dev		= &loopback_dev,
 			.obsolete	= -1,
 			.error		= -ENETUNREACH,
 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
@@ -128,11 +126,19 @@ struct rt6_info ip6_null_entry = {
 	.rt6i_ref	= ATOMIC_INIT(1),
 };
 
-struct fib6_node ip6_routing_table = {
-	.leaf		= &ip6_null_entry,
-	.fn_flags	= RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
+struct fib6_table global_fib6_table = {
+	.root = {
+		.leaf		= &ip6_null_entry,
+		.fn_flags	= RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
+	}
 };
 
+#ifdef CONFIG_VE
+#define ip6_routing_table (get_exec_env()->_fib6_table->root)
+#else
+#define ip6_routing_table (global_ip6_routing_table.root)
+#endif
+
 /* Protects all the ip6 fib */
 
 DEFINE_RWLOCK(rt6_lock);
@@ -778,7 +784,7 @@ static int ipv6_get_mtu(struct net_devic
 
 int ipv6_get_hoplimit(struct net_device *dev)
 {
-	int hoplimit = ipv6_devconf.hop_limit;
+	int hoplimit = ve_ipv6_devconf.hop_limit;
 	struct inet6_dev *idev;
 
 	idev = in6_dev_get(dev);
@@ -1421,10 +1427,12 @@ struct rt6_info *addrconf_dst_alloc(stru
 		rt->rt6i_flags |= RTF_ANYCAST;
 	else
 		rt->rt6i_flags |= RTF_LOCAL;
-	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
-	if (rt->rt6i_nexthop == NULL) {
+	rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, rt->rt6i_dev);
+	if (IS_ERR(rt->rt6i_nexthop)) {
+		void *err = rt->rt6i_nexthop;
+		rt->rt6i_nexthop = NULL;
 		dst_free((struct dst_entry *) rt);
-		return ERR_PTR(-ENOMEM);
+		return err;
 	}
 
 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
@@ -1640,8 +1648,12 @@ static int rt6_fill_node(struct sk_buff 
 		goto rtattr_failure;
 	if (rt->u.dst.neighbour)
 		RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
-	if (rt->u.dst.dev)
-		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
+	if (rt->u.dst.dev) {
+		struct net_device *odev = rt->rt6i_dev;
+		if (rt == &ip6_null_entry)
+			odev = &loopback_dev;
+		RTA_PUT(skb, RTA_OIF, sizeof(int), &odev->ifindex);
+	}
 	RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
 	ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
 	if (rt->rt6i_expires)
@@ -2110,23 +2122,31 @@ void __init ip6_route_init(void)
 	if (!ip6_dst_ops.kmem_cachep)
 		panic("cannot create ip6_dst_cache");
 
+#ifdef CONFIG_VE
+	global_fib6_table.owner_env = get_ve0();
+	get_ve0()->_fib6_table = &global_fib6_table;
+#endif
+	list_add(&global_fib6_table.list, &fib6_table_list);
 	fib6_init();
 #ifdef 	CONFIG_PROC_FS
-	p = proc_net_create("ipv6_route", 0, rt6_proc_info);
-	if (p)
+	p = create_proc_glob_entry("net/ipv6_route", 0, NULL);
+	if (p) {
 		p->owner = THIS_MODULE;
+		p->get_info = rt6_proc_info;
+	}
 
 	proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
 #endif
 #ifdef CONFIG_XFRM
 	xfrm6_init();
 #endif
+	ip6_null_entry.u.dst.dev = &loopback_dev;
 }
 
 void ip6_route_cleanup(void)
 {
 #ifdef CONFIG_PROC_FS
-	proc_net_remove("ipv6_route");
+	remove_proc_glob_entry("net/ipv6_route", NULL);
 	proc_net_remove("rt6_stats");
 #endif
 #ifdef CONFIG_XFRM
@@ -2136,3 +2156,37 @@ void ip6_route_cleanup(void)
 	fib6_gc_cleanup();
 	kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
 }
+
+#ifdef CONFIG_VE
+int init_ve_route6(struct ve_struct *ve)
+{
+	struct ve_struct *old_env = set_exec_env(ve);
+	ve->_fib6_table = kzalloc(sizeof(struct fib6_table), GFP_KERNEL_UBC);
+	if (ve->_fib6_table) {
+		ve->_fib6_table->owner_env = ve;
+		ve->_fib6_table->root.leaf = &ip6_null_entry;
+		ve->_fib6_table->root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
+		write_lock_bh(&rt6_lock);
+		list_add(&ve->_fib6_table->list, &fib6_table_list);
+		write_unlock_bh(&rt6_lock);
+	}
+	set_exec_env(old_env);
+	return ve->_fib6_table ? 0 : -ENOMEM;
+}
+EXPORT_SYMBOL(init_ve_route6);
+
+void fini_ve_route6(struct ve_struct *ve)
+{
+	struct ve_struct *old_env = set_exec_env(ve);
+
+	if (ve->_fib6_table) {
+		rt6_ifdown(NULL);
+		write_lock_bh(&rt6_lock);
+		list_del(&ve->_fib6_table->list);
+		write_unlock_bh(&rt6_lock);
+		kfree(ve->_fib6_table);
+	}
+	set_exec_env(old_env);
+}
+EXPORT_SYMBOL(fini_ve_route6);
+#endif
diff -upr linux-2.6.16.46-0.12.orig/net/ipv6/tcp_ipv6.c linux-2.6.16.46-0.12-027test011/net/ipv6/tcp_ipv6.c
--- linux-2.6.16.46-0.12.orig/net/ipv6/tcp_ipv6.c	2007-08-24 19:28:15.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv6/tcp_ipv6.c	2007-08-28 17:35:33.000000000 +0400
@@ -62,6 +62,8 @@
 #include <net/dsfield.h>
 #include <net/timewait_sock.h>
 
+#include <ub/ub_tcp.h>
+
 #include <asm/uaccess.h>
 
 #include <linux/proc_fs.h>
@@ -77,7 +79,7 @@ static void	tcp_v6_send_check(struct soc
 
 static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
 
-static struct inet_connection_sock_af_ops ipv6_mapped;
+struct inet_connection_sock_af_ops ipv6_mapped;
 static struct inet_connection_sock_af_ops ipv6_specific;
 
 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
@@ -1041,6 +1043,8 @@ static int tcp_v6_do_rcv(struct sock *sk
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_sock *tp;
 	struct sk_buff *opt_skb = NULL;
+	struct user_beancounter *ub;
+
 
 	/* Imagine: socket is IPv6. IPv4 packet arrives,
 	   goes to IPv4 receive handler and backlogged.
@@ -1053,6 +1057,8 @@ static int tcp_v6_do_rcv(struct sock *sk
 	if (skb->protocol == htons(ETH_P_IP))
 		return tcp_v4_do_rcv(sk, skb);
 
+	ub = set_exec_ub(sock_bc(sk)->ub);
+
 	if (sk_filter(sk, skb, 0))
 		goto discard;
 
@@ -1084,7 +1090,7 @@ static int tcp_v6_do_rcv(struct sock *sk
 		TCP_CHECK_TIMER(sk);
 		if (opt_skb)
 			goto ipv6_pktoptions;
-		return 0;
+		goto restore_context;
 	}
 
 	if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
@@ -1105,7 +1111,7 @@ static int tcp_v6_do_rcv(struct sock *sk
 				goto reset;
 			if (opt_skb)
 				__kfree_skb(opt_skb);
-			return 0;
+			goto restore_context;
 		}
 	}
 
@@ -1115,6 +1121,9 @@ static int tcp_v6_do_rcv(struct sock *sk
 	TCP_CHECK_TIMER(sk);
 	if (opt_skb)
 		goto ipv6_pktoptions;
+
+restore_context:
+	(void)set_exec_ub(ub);
 	return 0;
 
 reset:
@@ -1123,7 +1132,7 @@ discard:
 	if (opt_skb)
 		__kfree_skb(opt_skb);
 	kfree_skb(skb);
-	return 0;
+	goto restore_context;
 csum_err:
 	TCP_INC_STATS_BH(TCP_MIB_INERRS);
 	goto discard;
@@ -1155,7 +1164,7 @@ ipv6_pktoptions:
 
 	if (opt_skb)
 		kfree_skb(opt_skb);
-	return 0;
+	goto restore_context;
 }
 
 static int tcp_v6_rcv(struct sk_buff **pskb)
@@ -1324,7 +1333,7 @@ static struct inet_connection_sock_af_op
  *	TCP over IPv4 via INET6 API
  */
 
-static struct inet_connection_sock_af_ops ipv6_mapped = {
+struct inet_connection_sock_af_ops ipv6_mapped = {
 	.queue_xmit	=	ip_queue_xmit,
 	.send_check	=	tcp_v4_send_check,
 	.rebuild_header	=	inet_sk_rebuild_header,
@@ -1338,6 +1347,7 @@ static struct inet_connection_sock_af_op
 	.addr2sockaddr	=	inet6_csk_addr2sockaddr,
 	.sockaddr_len	=	sizeof(struct sockaddr_in6)
 };
+EXPORT_SYMBOL_GPL(ipv6_mapped);
 
 
 
@@ -1544,7 +1554,7 @@ out:
 static struct file_operations tcp6_seq_fops;
 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
 	.owner		= THIS_MODULE,
-	.name		= "tcp6",
+	.name		= "net/tcp6",
 	.family		= AF_INET6,
 	.seq_start	= tcp_seq_start,
 	.seq_next	= tcp_seq_next,
diff -upr linux-2.6.16.46-0.12.orig/net/ipv6/udp.c linux-2.6.16.46-0.12-027test011/net/ipv6/udp.c
--- linux-2.6.16.46-0.12.orig/net/ipv6/udp.c	2007-08-24 19:28:09.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/ipv6/udp.c	2007-08-28 17:35:32.000000000 +0400
@@ -69,7 +69,9 @@ static int udp_v6_get_port(struct sock *
 {
 	struct sock *sk2;
 	struct hlist_node *node;
+	struct ve_struct *env;
 
+	env = sk->owner_env;
 	write_lock_bh(&udp_hash_lock);
 	if (snum == 0) {
 		int best_size_so_far, best, result, i;
@@ -83,7 +85,7 @@ static int udp_v6_get_port(struct sock *
 			int size;
 			struct hlist_head *list;
 
-			list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
+			list = &udp_hash[udp_hashfn(result, VEID(env))];
 			if (hlist_empty(list)) {
 				if (result > sysctl_local_port_range[1])
 					result = sysctl_local_port_range[0] +
@@ -105,7 +107,7 @@ static int udp_v6_get_port(struct sock *
 				result = sysctl_local_port_range[0]
 					+ ((result - sysctl_local_port_range[0]) &
 					   (UDP_HTABLE_SIZE - 1));
-			if (!udp_lport_inuse(result))
+			if (!udp_lport_inuse(result, env))
 				break;
 		}
 		if (i >= (1 << 16) / UDP_HTABLE_SIZE)
@@ -114,9 +116,10 @@ gotit:
 		udp_port_rover = snum = result;
 	} else {
 		sk_for_each(sk2, node,
-			    &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
+			    &udp_hash[udp_hashfn(snum, VEID(env))]) {
 			if (inet_sk(sk2)->num == snum &&
 			    sk2 != sk &&
+			    ve_accessible_strict(sk2->owner_env, env) &&
 			    (!sk2->sk_bound_dev_if ||
 			     !sk->sk_bound_dev_if ||
 			     sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
@@ -128,7 +131,7 @@ gotit:
 
 	inet_sk(sk)->num = snum;
 	if (sk_unhashed(sk)) {
-		sk_add_node(sk, &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]);
+		sk_add_node(sk, &udp_hash[udp_hashfn(snum, VEID(env))]);
 		sock_prot_inc_use(sk->sk_prot);
 	}
 	write_unlock_bh(&udp_hash_lock);
@@ -161,12 +164,15 @@ static struct sock *udp_v6_lookup(struct
 	struct hlist_node *node;
 	unsigned short hnum = ntohs(dport);
 	int badness = -1;
+	struct ve_struct *env;
 
  	read_lock(&udp_hash_lock);
-	sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) {
+	env = get_exec_env();
+	sk_for_each(sk, node, &udp_hash[udp_hashfn(hnum, VEID(env))]) {
 		struct inet_sock *inet = inet_sk(sk);
 
-		if (inet->num == hnum && sk->sk_family == PF_INET6) {
+		if (inet->num == hnum && sk->sk_family == PF_INET6 &&
+				ve_accessible_strict(sk->owner_env, env)) {
 			struct ipv6_pinfo *np = inet6_sk(sk);
 			int score = 0;
 			if (inet->dport) {
@@ -414,7 +420,8 @@ static void udpv6_mcast_deliver(struct u
 	int dif;
 
 	read_lock(&udp_hash_lock);
-	sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
+	sk = sk_head(&udp_hash[udp_hashfn(ntohs(uh->dest),
+				VEID(skb->owner_env))]);
 	dif = inet6_iif(skb);
 	sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
 	if (!sk) {
@@ -1017,7 +1024,7 @@ static int udp6_seq_show(struct seq_file
 static struct file_operations udp6_seq_fops;
 static struct udp_seq_afinfo udp6_seq_afinfo = {
 	.owner		= THIS_MODULE,
-	.name		= "udp6",
+	.name		= "net/udp6",
 	.family		= AF_INET6,
 	.seq_show	= udp6_seq_show,
 	.seq_fops	= &udp6_seq_fops,
diff -upr linux-2.6.16.46-0.12.orig/net/netfilter/core.c linux-2.6.16.46-0.12-027test011/net/netfilter/core.c
--- linux-2.6.16.46-0.12.orig/net/netfilter/core.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/netfilter/core.c	2007-08-28 17:35:32.000000000 +0400
@@ -32,16 +32,35 @@
  * of skbuffs queued for userspace, and not deregister a hook unless
  * this is zero, but that sucks.  Now, we simply check when the
  * packets come back: if the hook is gone, the packet is discarded. */
+static DEFINE_SPINLOCK(nf_hook_lock);
+
 struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
 EXPORT_SYMBOL(nf_hooks);
-static DEFINE_SPINLOCK(nf_hook_lock);
+#ifdef CONFIG_VE_IPTABLES
+#define ve_nf_hooks \
+       ((struct list_head (*)[NF_MAX_HOOKS])(get_exec_env()->_nf_hooks))
+#else
+#define ve_nf_hooks nf_hooks
+#endif
+
 
 int nf_register_hook(struct nf_hook_ops *reg)
 {
 	struct list_head *i;
+	struct ve_struct *env;
+ 
+	env = get_exec_env();
+	if (!ve_is_super(env)) {
+		struct nf_hook_ops *tmp;
+		tmp = kmalloc(sizeof(struct nf_hook_ops), GFP_KERNEL);
+		if (!tmp)
+			return -ENOMEM;
+		memcpy(tmp, reg, sizeof(struct nf_hook_ops));
+		reg = tmp;
+	}
 
 	spin_lock_bh(&nf_hook_lock);
-	list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
+	list_for_each(i, &ve_nf_hooks[reg->pf][reg->hooknum]) {
 		if (reg->priority < ((struct nf_hook_ops *)i)->priority)
 			break;
 	}
@@ -55,11 +74,29 @@ EXPORT_SYMBOL(nf_register_hook);
 
 void nf_unregister_hook(struct nf_hook_ops *reg)
 {
+	struct nf_hook_ops *i;
+	struct ve_struct *env;
+
+	env = get_exec_env();
+	if (!ve_is_super(env)) {
+		list_for_each_entry_rcu(i,
+					&ve_nf_hooks[reg->pf][reg->hooknum], list) {
+			if (reg->hook == i->hook) {
+				reg = i;
+				break;
+			}
+		}
+		if (reg != i)
+			return;
+	}
+
 	spin_lock_bh(&nf_hook_lock);
 	list_del_rcu(&reg->list);
 	spin_unlock_bh(&nf_hook_lock);
 
 	synchronize_net();
+	if (!ve_is_super(env))
+		kfree(reg);
 }
 EXPORT_SYMBOL(nf_unregister_hook);
 
@@ -120,9 +157,9 @@ int nf_hook_slow(int pf, unsigned int ho
 	/* We may already have this, but read-locks nest anyway */
 	rcu_read_lock();
 
-	elem = &nf_hooks[pf][hook];
+	elem = &ve_nf_hooks[pf][hook];
 next_hook:
-	verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
+	verdict = nf_iterate(&ve_nf_hooks[pf][hook], pskb, hook, indev,
 			     outdev, &elem, okfn, hook_thresh);
 	if (verdict == NF_ACCEPT || verdict == NF_STOP) {
 		ret = 1;
@@ -195,13 +232,54 @@ struct proc_dir_entry *proc_net_netfilte
 EXPORT_SYMBOL(proc_net_netfilter);
 #endif
 
-void __init netfilter_init(void)
+void init_nf_hooks(struct list_head (*nh)[NF_MAX_HOOKS])
 {
 	int i, h;
 	for (i = 0; i < NPROTO; i++) {
 		for (h = 0; h < NF_MAX_HOOKS; h++)
-			INIT_LIST_HEAD(&nf_hooks[i][h]);
+			INIT_LIST_HEAD(&ve_nf_hooks[i][h]);
 	}
+}
+
+int init_netfilter(void)
+{
+#ifdef CONFIG_VE_IPTABLES
+       struct ve_struct *envid;
+
+       envid = get_exec_env();
+       envid->_nf_hooks = kmalloc(sizeof(nf_hooks), GFP_KERNEL);
+       if (envid->_nf_hooks == NULL)
+               return -ENOMEM;
+
+       /* FIXME: charge ubc */
+
+       init_nf_hooks(envid->_nf_hooks);
+       return 0;
+#else
+       init_nf_hooks(nf_hooks);
+       return 0;
+#endif
+}
+EXPORT_SYMBOL(init_netfilter);
+
+#ifdef CONFIG_VE_IPTABLES
+void fini_netfilter(void)
+{
+       struct ve_struct *envid;
+
+       envid = get_exec_env();
+       if (envid->_nf_hooks != NULL)
+               kfree(envid->_nf_hooks);
+       envid->_nf_hooks = NULL;
+
+       /* FIXME: uncharge ubc */
+}
+EXPORT_SYMBOL(fini_netfilter);
+#endif
+
+void __init netfilter_init(void)
+{
+       init_netfilter();
 
 #ifdef CONFIG_PROC_FS
 	proc_net_netfilter = proc_mkdir("netfilter", proc_net);
@@ -214,3 +292,4 @@ void __init netfilter_init(void)
 	if (netfilter_log_init() < 0)
 		panic("cannot initialize nf_log");
 }
+
diff -upr linux-2.6.16.46-0.12.orig/net/netfilter/nf_conntrack_proto_sctp.c linux-2.6.16.46-0.12-027test011/net/netfilter/nf_conntrack_proto_sctp.c
--- linux-2.6.16.46-0.12.orig/net/netfilter/nf_conntrack_proto_sctp.c	2007-08-24 19:28:07.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/netfilter/nf_conntrack_proto_sctp.c	2007-08-28 17:35:30.000000000 +0400
@@ -467,7 +467,8 @@ static int sctp_new(struct nf_conn *conn
 					 SCTP_CONNTRACK_NONE, sch->type);
 
 		/* Invalid: delete conntrack */
-		if (newconntrack == SCTP_CONNTRACK_MAX) {
+		if (newconntrack == SCTP_CONNTRACK_NONE ||
+		    newconntrack == SCTP_CONNTRACK_MAX) {
 			DEBUGP("nf_conntrack_sctp: invalid new deleting.\n");
 			return 0;
 		}
diff -upr linux-2.6.16.46-0.12.orig/net/netfilter/nf_queue.c linux-2.6.16.46-0.12-027test011/net/netfilter/nf_queue.c
--- linux-2.6.16.46-0.12.orig/net/netfilter/nf_queue.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/netfilter/nf_queue.c	2007-08-28 17:35:32.000000000 +0400
@@ -209,12 +209,12 @@ void nf_reinject(struct sk_buff *skb, st
 	/* Drop reference to owner of hook which queued us. */
 	module_put(info->elem->owner);
 
-	list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
+	list_for_each_rcu(i, &ve_nf_hooks[info->pf][info->hook]) {
 		if (i == elem) 
   			break;
   	}
   
-	if (i == &nf_hooks[info->pf][info->hook]) {
+	if (i == &ve_nf_hooks[info->pf][info->hook]) {
 		/* The module which sent it to userspace is gone. */
 		NFDEBUG("%s: module disappeared, dropping packet.\n",
 			__FUNCTION__);
@@ -235,7 +235,7 @@ void nf_reinject(struct sk_buff *skb, st
 
 	if (verdict == NF_ACCEPT) {
 	next_hook:
-		verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
+		verdict = nf_iterate(&ve_nf_hooks[info->pf][info->hook],
 				     &skb, info->hook, 
 				     info->indev, info->outdev, &elem,
 				     info->okfn, INT_MIN);
diff -upr linux-2.6.16.46-0.12.orig/net/netfilter/nf_sockopt.c linux-2.6.16.46-0.12-027test011/net/netfilter/nf_sockopt.c
--- linux-2.6.16.46-0.12.orig/net/netfilter/nf_sockopt.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/netfilter/nf_sockopt.c	2007-08-28 17:35:32.000000000 +0400
@@ -80,6 +80,11 @@ static int nf_sockopt(struct sock *sk, i
 	struct nf_sockopt_ops *ops;
 	int ret;
 
+#ifdef CONFIG_VE_IPTABLES
+	if (!get_exec_env()->_nf_hooks)
+               return -ENOPROTOOPT;
+#endif
+
 	if (down_interruptible(&nf_sockopt_mutex) != 0)
 		return -EINTR;
 
diff -upr linux-2.6.16.46-0.12.orig/net/netfilter/x_tables.c linux-2.6.16.46-0.12-027test011/net/netfilter/x_tables.c
--- linux-2.6.16.46-0.12.orig/net/netfilter/x_tables.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/netfilter/x_tables.c	2007-08-28 17:35:32.000000000 +0400
@@ -24,6 +24,10 @@
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_arp.h>
+#include <linux/nfcalls.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_mem.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
@@ -40,6 +44,14 @@ struct xt_af {
 
 static struct xt_af *xt;
 
+#ifdef CONFIG_VE_IPTABLES
+/* include ve.h and define get_exec_env */
+#include <linux/sched.h>
+#define xt_tables(af)	(get_exec_env()->_xt_tables[af])
+#else
+#define xt_tables(af)	xt[af].tables
+#endif
+
 #ifdef DEBUG_IP_FIREWALL_USER
 #define duprintf(format, args...) printk(format , ## args)
 #else
@@ -52,6 +64,46 @@ enum {
 	MATCH,
 };
 
+#ifdef CONFIG_USER_RESOURCE
+static inline struct user_beancounter *xt_table_ub(struct xt_table_info *info)
+{
+	struct user_beancounter *ub;
+
+	for (ub = mem_ub(info); ub->parent != NULL; ub = ub->parent);
+	return ub;
+}
+
+static void uncharge_xtables(struct xt_table_info *info, unsigned long size)
+{
+	struct user_beancounter *ub;
+
+	ub = xt_table_ub(info);
+	uncharge_beancounter(ub, UB_NUMXTENT, size);
+}
+
+static int recharge_xtables(int check_ub,
+		struct xt_table_info *new, struct xt_table_info *old)
+{
+	struct user_beancounter *ub;
+	long change;
+
+	ub = xt_table_ub(new);
+	BUG_ON(check_ub && ub != xt_table_ub(old));
+
+	change = (long)new->number - (long)old->number;
+	if (change > 0) {
+		if (charge_beancounter(ub, UB_NUMXTENT, change, UB_SOFT))
+			return -ENOMEM;
+	} else if (change < 0)
+		uncharge_beancounter(ub, UB_NUMXTENT, -change);
+
+	return 0;
+}
+#else
+#define recharge_xtables(c, new, old)	(0)
+#define uncharge_xtables(info, s)	do { } while (0)
+#endif	/* CONFIG_USER_RESOURCE */
+
 /* Registration hooks for targets. */
 int
 xt_register_target(int af, struct xt_target *target)
@@ -63,7 +115,7 @@ xt_register_target(int af, struct xt_tar
 		return ret;
 	list_add(&target->list, &xt[af].target);
 	up(&xt[af].mutex);
-	return ret;
+	return 0;
 }
 EXPORT_SYMBOL(xt_register_target);
 
@@ -88,7 +140,7 @@ xt_register_match(int af, struct xt_matc
 	list_add(&match->list, &xt[af].match);
 	up(&xt[af].mutex);
 
-	return ret;
+	return 0;
 }
 EXPORT_SYMBOL(xt_register_match);
 
@@ -246,19 +298,19 @@ struct xt_table_info *xt_alloc_table_inf
 	if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > num_physpages)
 		return NULL;
 
-	newinfo = kzalloc(sizeof(struct xt_table_info), GFP_KERNEL);
+	newinfo = kzalloc(sizeof(struct xt_table_info), GFP_KERNEL_UBC);
 	if (!newinfo)
 		return NULL;
 
-	newinfo->size = size;
+	newinfo->alloc_size = newinfo->size = size;
 
 	for_each_cpu(cpu) {
 		if (size <= PAGE_SIZE)
 			newinfo->entries[cpu] = kmalloc_node(size,
-							GFP_KERNEL,
+							GFP_KERNEL_UBC,
 							cpu_to_node(cpu));
 		else
-			newinfo->entries[cpu] = vmalloc_node(size,
+			newinfo->entries[cpu] = ub_vmalloc_node(size,
 							cpu_to_node(cpu));
 
 		if (newinfo->entries[cpu] == NULL) {
@@ -276,7 +328,7 @@ void xt_free_table_info(struct xt_table_
 	int cpu;
 
 	for_each_cpu(cpu) {
-		if (info->size <= PAGE_SIZE)
+		if (info->alloc_size <= PAGE_SIZE)
 			kfree(info->entries[cpu]);
 		else
 			vfree(info->entries[cpu]);
@@ -293,7 +345,7 @@ struct xt_table *xt_find_table_lock(int 
 	if (down_interruptible(&xt[af].mutex) != 0)
 		return ERR_PTR(-EINTR);
 
-	list_for_each_entry(t, &xt[af].tables, list)
+	list_for_each_entry(t, &xt_tables(af), list)
 		if (strcmp(t->name, name) == 0 && try_module_get(t->me))
 			return t;
 	up(&xt[af].mutex);
@@ -328,6 +380,13 @@ xt_replace_table(struct xt_table *table,
 		return NULL;
 	}
 	oldinfo = private;
+
+	if (recharge_xtables(num_counters != 0, newinfo, oldinfo)) {
+		write_unlock_bh(&table->lock);
+		*error = -ENOMEM;
+		return NULL;
+	}
+
 	table->private = newinfo;
 	newinfo->initial_entries = oldinfo->initial_entries;
 	write_unlock_bh(&table->lock);
@@ -348,13 +407,14 @@ int xt_register_table(struct xt_table *t
 		return ret;
 
 	/* Don't autoload: we'd eat our tail... */
-	if (list_named_find(&xt[table->af].tables, table->name)) {
+	if (list_named_find(&xt_tables(table->af), table->name)) {
 		ret = -EEXIST;
 		goto unlock;
 	}
 
 	/* Simplifies replace_table code. */
 	table->private = bootstrap;
+	rwlock_init(&table->lock);
 	if (!xt_replace_table(table, 0, newinfo, &ret))
 		goto unlock;
 
@@ -364,8 +424,7 @@ int xt_register_table(struct xt_table *t
 	/* save number of initial entries */
 	private->initial_entries = private->number;
 
-	rwlock_init(&table->lock);
-	list_prepend(&xt[table->af].tables, table);
+	list_prepend(&xt_tables(table->af), table);
 
 	ret = 0;
  unlock:
@@ -374,19 +433,67 @@ int xt_register_table(struct xt_table *t
 }
 EXPORT_SYMBOL_GPL(xt_register_table);
 
+struct xt_table * virt_xt_register_table(struct xt_table *table,
+		      struct xt_table_info *bootstrap,
+		      struct xt_table_info *newinfo)
+{
+	int ret;
+	struct module *mod = table->me;
+
+	if (!ve_is_super(get_exec_env())) {
+		struct xt_table *tmp;
+		__module_get(mod);
+		ret = -ENOMEM;
+		tmp = ub_kmalloc(sizeof(struct xt_table), GFP_KERNEL);
+		if (!tmp)
+			goto nomem;
+		memcpy(tmp, table, sizeof(struct xt_table));
+		table = tmp;
+	}
+
+	ret = xt_register_table(table, bootstrap, newinfo);
+	if (ret)
+		goto out;
+
+	return table;
+out:
+	if (!ve_is_super(get_exec_env())) {
+		kfree(table);
+nomem:
+		module_put(mod);
+	}
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(virt_xt_register_table);
+
 void *xt_unregister_table(struct xt_table *table)
 {
 	struct xt_table_info *private;
 
 	down(&xt[table->af].mutex);
 	private = table->private;
-	LIST_DELETE(&xt[table->af].tables, table);
+	LIST_DELETE(&xt_tables(table->af), table);
 	up(&xt[table->af].mutex);
 
+	uncharge_xtables(private, private->number);
+
 	return private;
 }
 EXPORT_SYMBOL_GPL(xt_unregister_table);
 
+void *virt_xt_unregister_table(struct xt_table *table)
+{
+	void *ret;
+
+	ret = xt_unregister_table(table);
+	if (!ve_is_super(get_exec_env())) {
+		module_put(table->me);
+		kfree(table);
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(virt_xt_unregister_table);
+
 #ifdef CONFIG_PROC_FS
 static char *xt_proto_prefix[NPROTO] = {
 	[AF_INET]	= "ip",
@@ -421,7 +528,7 @@ static struct list_head *type2list(u_int
 		list = &xt[af].match;
 		break;
 	case TABLE:
-		list = &xt[af].tables;
+		list = &xt_tables(af);
 		break;
 	default:
 		list = NULL;
@@ -534,6 +641,7 @@ int xt_proto_init(int af)
 		return -EINVAL;
 
 
+	INIT_LIST_HEAD(&xt_tables(af));
 #ifdef CONFIG_PROC_FS
 	strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
 	strlcat(buf, FORMAT_TABLES, sizeof(buf));
@@ -619,6 +727,6 @@ static void __exit xt_fini(void)
 	kfree(xt);
 }
 
-module_init(xt_init);
+subsys_initcall(xt_init);
 module_exit(xt_fini);
 
diff -upr linux-2.6.16.46-0.12.orig/net/netfilter/xt_MARK.c linux-2.6.16.46-0.12-027test011/net/netfilter/xt_MARK.c
--- linux-2.6.16.46-0.12.orig/net/netfilter/xt_MARK.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/netfilter/xt_MARK.c	2007-08-28 17:35:32.000000000 +0400
@@ -79,19 +79,19 @@ checkentry_v0(const char *tablename,
 	struct xt_mark_target_info *markinfo = targinfo;
 
 	if (targinfosize != XT_ALIGN(sizeof(struct xt_mark_target_info))) {
-		printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n",
+		ve_printk(VE_LOG, KERN_WARNING "MARK: targinfosize %u != %Zu\n",
 		       targinfosize,
 		       XT_ALIGN(sizeof(struct xt_mark_target_info)));
 		return 0;
 	}
 
 	if (strcmp(tablename, "mangle") != 0) {
-		printk(KERN_WARNING "MARK: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
+		ve_printk(VE_LOG, KERN_WARNING "MARK: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
 		return 0;
 	}
 
 	if (markinfo->mark > 0xffffffff) {
-		printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
+		ve_printk(VE_LOG, KERN_WARNING "MARK: Only supports 32bit wide mark\n");
 		return 0;
 	}
 
@@ -108,27 +108,27 @@ checkentry_v1(const char *tablename,
 	struct xt_mark_target_info_v1 *markinfo = targinfo;
 
 	if (targinfosize != XT_ALIGN(sizeof(struct xt_mark_target_info_v1))){
-		printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n",
+		ve_printk(VE_LOG, KERN_WARNING "MARK: targinfosize %u != %Zu\n",
 		       targinfosize,
 		       XT_ALIGN(sizeof(struct xt_mark_target_info_v1)));
 		return 0;
 	}
 
 	if (strcmp(tablename, "mangle") != 0) {
-		printk(KERN_WARNING "MARK: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
+		ve_printk(VE_LOG, KERN_WARNING "MARK: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
 		return 0;
 	}
 
 	if (markinfo->mode != XT_MARK_SET
 	    && markinfo->mode != XT_MARK_AND
 	    && markinfo->mode != XT_MARK_OR) {
-		printk(KERN_WARNING "MARK: unknown mode %u\n",
+		ve_printk(VE_LOG, KERN_WARNING "MARK: unknown mode %u\n",
 		       markinfo->mode);
 		return 0;
 	}
 
 	if (markinfo->mark > 0xffffffff) {
-		printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
+		ve_printk(VE_LOG, KERN_WARNING "MARK: Only supports 32bit wide mark\n");
 		return 0;
 	}
 
diff -upr linux-2.6.16.46-0.12.orig/net/netfilter/xt_conntrack.c linux-2.6.16.46-0.12-027test011/net/netfilter/xt_conntrack.c
--- linux-2.6.16.46-0.12.orig/net/netfilter/xt_conntrack.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/netfilter/xt_conntrack.c	2007-08-28 17:35:32.000000000 +0400
@@ -20,6 +20,7 @@
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_conntrack.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
@@ -213,10 +214,112 @@ static int check(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat_to_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct ipt_entry_match *pm;
+	struct xt_conntrack_info *pinfo;
+	struct compat_xt_conntrack_info info;
+	u_int16_t msize;
+
+	pm = (struct ipt_entry_match *)match;
+	msize = pm->u.user.match_size;
+	if (__copy_to_user(*dstptr, pm, sizeof(struct ipt_entry_match)))
+		return -EFAULT;
+	pinfo = (struct xt_conntrack_info *)pm->data;
+	memset(&info, 0, sizeof(struct compat_xt_conntrack_info));
+	info.statemask = pinfo->statemask;
+	info.statusmask = pinfo->statusmask;
+	memcpy(info.tuple, pinfo->tuple, IP_CT_DIR_MAX *
+			sizeof(struct ip_conntrack_tuple));
+	memcpy(info.sipmsk, pinfo->sipmsk,
+			IP_CT_DIR_MAX * sizeof(struct in_addr));
+	memcpy(info.dipmsk, pinfo->dipmsk,
+			IP_CT_DIR_MAX * sizeof(struct in_addr));
+	info.expires_min = pinfo->expires_min;
+	info.expires_max = pinfo->expires_max;
+	info.flags = pinfo->flags;
+	info.invflags = pinfo->invflags;
+	if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_match),
+				&info, sizeof(struct compat_xt_conntrack_info)))
+		return -EFAULT;
+	msize -= off;
+	if (put_user(msize, (u_int16_t *)*dstptr))
+		return -EFAULT;
+	*size -= off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int compat_from_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct compat_ipt_entry_match *pm;
+	struct ipt_entry_match *dstpm;
+	struct compat_xt_conntrack_info *pinfo;
+	struct xt_conntrack_info info;
+	u_int16_t msize;
+
+	pm = (struct compat_ipt_entry_match *)match;
+	dstpm = (struct ipt_entry_match *)*dstptr;
+	msize = pm->u.user.match_size;
+	memcpy(*dstptr, pm, sizeof(struct compat_ipt_entry_match));
+	pinfo = (struct compat_xt_conntrack_info *)pm->data;
+	memset(&info, 0, sizeof(struct xt_conntrack_info));
+	info.statemask = pinfo->statemask;
+	info.statusmask = pinfo->statusmask;
+	memcpy(info.tuple, pinfo->tuple, IP_CT_DIR_MAX *
+			sizeof(struct ip_conntrack_tuple));
+	memcpy(info.sipmsk, pinfo->sipmsk,
+			IP_CT_DIR_MAX * sizeof(struct in_addr));
+	memcpy(info.dipmsk, pinfo->dipmsk,
+			IP_CT_DIR_MAX * sizeof(struct in_addr));
+	info.expires_min = pinfo->expires_min;
+	info.expires_max = pinfo->expires_max;
+	info.flags = pinfo->flags;
+	info.invflags = pinfo->invflags;
+	memcpy(*dstptr + sizeof(struct compat_ipt_entry_match),
+				&info, sizeof(struct xt_conntrack_info));
+	msize += off;
+	dstpm->u.user.match_size = msize;
+	*size += off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int compat(void *match, void **dstptr, int *size, int convert)
+{
+	int ret, off;
+
+	off = XT_ALIGN(sizeof(struct xt_conntrack_info)) -
+		COMPAT_XT_ALIGN(sizeof(struct compat_xt_conntrack_info));
+	switch (convert) {
+		case COMPAT_TO_USER:
+			ret = compat_to_user(match, dstptr, size, off);
+			break;
+		case COMPAT_FROM_USER:
+			ret = compat_from_user(match, dstptr, size, off);
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			ret = 0;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+#endif
+
 static struct xt_match conntrack_match = {
 	.name		= "conntrack",
 	.match		= &match,
 	.checkentry	= &check,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
diff -upr linux-2.6.16.46-0.12.orig/net/netfilter/xt_helper.c linux-2.6.16.46-0.12-027test011/net/netfilter/xt_helper.c
--- linux-2.6.16.46-0.12.orig/net/netfilter/xt_helper.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/netfilter/xt_helper.c	2007-08-28 17:35:36.000000000 +0400
@@ -24,6 +24,7 @@
 #endif
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_helper.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>");
@@ -148,16 +149,101 @@ static int check(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat_to_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct ipt_entry_match *pm;
+	struct xt_helper_info *pinfo;
+	struct compat_xt_helper_info info;
+	u_int16_t msize;
+
+	pm = (struct ipt_entry_match *)match;
+	msize = pm->u.user.match_size;
+	if (__copy_to_user(*dstptr, pm, sizeof(struct ipt_entry_match)))
+		return -EFAULT;
+	pinfo = (struct xt_helper_info *)pm->data;
+	memset(&info, 0, sizeof(struct compat_xt_helper_info));
+	info.invert = pinfo->invert;
+	memcpy(info.name, pinfo->name, 30);
+	if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_match),
+				&info, sizeof(struct compat_xt_helper_info)))
+		return -EFAULT;
+	msize -= off;
+	if (put_user(msize, (u_int16_t *)*dstptr))
+		return -EFAULT;
+	*size -= off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int compat_from_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct compat_ipt_entry_match *pm;
+	struct ipt_entry_match *dstpm;
+	struct compat_xt_helper_info *pinfo;
+	struct xt_helper_info info;
+	u_int16_t msize;
+
+	pm = (struct compat_ipt_entry_match *)match;
+	dstpm = (struct ipt_entry_match *)*dstptr;
+	msize = pm->u.user.match_size;
+	memcpy(*dstptr, pm, sizeof(struct compat_ipt_entry_match));
+	pinfo = (struct compat_xt_helper_info *)pm->data;
+	memset(&info, 0, sizeof(struct xt_helper_info));
+	info.invert = pinfo->invert;
+	memcpy(info.name, pinfo->name, 30);
+	memcpy(*dstptr + sizeof(struct compat_ipt_entry_match),
+				&info, sizeof(struct xt_helper_info));
+	msize += off;
+	dstpm->u.user.match_size = msize;
+	*size += off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int compat(void *match, void **dstptr, int *size, int convert)
+{
+	int ret, off;
+
+	off = XT_ALIGN(sizeof(struct xt_helper_info)) -
+		COMPAT_XT_ALIGN(sizeof(struct compat_xt_helper_info));
+	switch (convert) {
+		case COMPAT_TO_USER:
+			ret = compat_to_user(match, dstptr, size, off);
+			break;
+		case COMPAT_FROM_USER:
+			ret = compat_from_user(match, dstptr, size, off);
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			ret = 0;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+#endif
+
 static struct xt_match helper_match = {
 	.name		= "helper",
 	.match		= &match,
 	.checkentry	= &check,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 static struct xt_match helper6_match = {
 	.name		= "helper",
 	.match		= &match,
 	.checkentry	= &check,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
diff -upr linux-2.6.16.46-0.12.orig/net/netfilter/xt_length.c linux-2.6.16.46-0.12-027test011/net/netfilter/xt_length.c
--- linux-2.6.16.46-0.12.orig/net/netfilter/xt_length.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/netfilter/xt_length.c	2007-08-28 17:35:36.000000000 +0400
@@ -63,16 +63,34 @@ checkentry(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat(void *match,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = XT_ALIGN(sizeof(struct xt_length_info)) -
+		COMPAT_XT_ALIGN(sizeof(struct xt_length_info));
+	return ipt_match_align_compat(match, dstptr, size, off, convert);
+}
+#endif
+
 static struct xt_match length_match = {
 	.name		= "length",
 	.match		= &match,
 	.checkentry	= &checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 static struct xt_match length6_match = {
 	.name		= "length",
 	.match		= &match6,
 	.checkentry	= &checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
diff -upr linux-2.6.16.46-0.12.orig/net/netfilter/xt_limit.c linux-2.6.16.46-0.12-027test011/net/netfilter/xt_limit.c
--- linux-2.6.16.46-0.12.orig/net/netfilter/xt_limit.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/netfilter/xt_limit.c	2007-08-28 17:35:36.000000000 +0400
@@ -20,6 +20,7 @@
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_limit.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>");
@@ -119,7 +120,7 @@ ipt_limit_checkentry(const char *tablena
 	/* Check for overflow. */
 	if (r->burst == 0
 	    || user2credits(r->avg * r->burst) < user2credits(r->avg)) {
-		printk("Overflow in xt_limit, try lower: %u/%u\n",
+		ve_printk(VE_LOG, "Overflow in xt_limit, try lower: %u/%u\n",
 		       r->avg, r->burst);
 		return 0;
 	}
@@ -137,16 +138,104 @@ ipt_limit_checkentry(const char *tablena
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int ipt_limit_compat_to_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct ipt_entry_match *pm;
+	struct xt_rateinfo *pinfo;
+	struct compat_xt_rateinfo rinfo;
+	u_int16_t msize;
+
+	pm = (struct ipt_entry_match *)match;
+	msize = pm->u.user.match_size;
+	if (__copy_to_user(*dstptr, pm, sizeof(struct ipt_entry_match)))
+		return -EFAULT;
+	pinfo = (struct xt_rateinfo *)pm->data;
+	memset(&rinfo, 0, sizeof(struct compat_xt_rateinfo));
+	rinfo.avg = pinfo->avg;
+	rinfo.burst = pinfo->burst;
+	if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_match),
+				&rinfo, sizeof(struct compat_xt_rateinfo)))
+		return -EFAULT;
+	msize -= off;
+	if (put_user(msize, (u_int16_t *)*dstptr))
+		return -EFAULT;
+	*size -= off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int ipt_limit_compat_from_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct compat_ipt_entry_match *pm;
+	struct ipt_entry_match *dstpm;
+	struct compat_xt_rateinfo *pinfo;
+	struct xt_rateinfo rinfo;
+	u_int16_t msize;
+
+	pm = (struct compat_ipt_entry_match *)match;
+	dstpm = (struct ipt_entry_match *)*dstptr;
+	msize = pm->u.user.match_size;
+	memcpy(*dstptr, pm, sizeof(struct compat_ipt_entry_match));
+	pinfo = (struct compat_xt_rateinfo *)pm->data;
+	memset(&rinfo, 0, sizeof(struct xt_rateinfo));
+	rinfo.avg = pinfo->avg;
+	rinfo.burst = pinfo->burst;
+	memcpy(*dstptr + sizeof(struct compat_ipt_entry_match),
+				&rinfo, sizeof(struct xt_rateinfo));
+	msize += off;
+	dstpm->u.user.match_size = msize;
+	*size += off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int ipt_limit_compat(void *match, void **dstptr,
+		int *size, int convert)
+{
+	int ret, off;
+
+	off = XT_ALIGN(sizeof(struct xt_rateinfo)) -
+		COMPAT_XT_ALIGN(sizeof(struct compat_xt_rateinfo));
+	switch (convert) {
+		case COMPAT_TO_USER:
+			ret = ipt_limit_compat_to_user(match,
+					dstptr, size, off);
+			break;
+		case COMPAT_FROM_USER:
+			ret = ipt_limit_compat_from_user(match,
+					dstptr, size, off);
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			ret = 0;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+#endif
+
 static struct xt_match ipt_limit_reg = {
 	.name		= "limit",
 	.match		= ipt_limit_match,
 	.checkentry	= ipt_limit_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= ipt_limit_compat,
+#endif
 	.me		= THIS_MODULE,
 };
 static struct xt_match limit6_reg = {
 	.name		= "limit",
 	.match		= ipt_limit_match,
 	.checkentry	= ipt_limit_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= ipt_limit_compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
diff -upr linux-2.6.16.46-0.12.orig/net/netfilter/xt_state.c linux-2.6.16.46-0.12-027test011/net/netfilter/xt_state.c
--- linux-2.6.16.46-0.12.orig/net/netfilter/xt_state.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/netfilter/xt_state.c	2007-08-28 17:35:36.000000000 +0400
@@ -13,6 +13,7 @@
 #include <net/netfilter/nf_conntrack_compat.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_state.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
@@ -55,10 +56,90 @@ static int check(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat_to_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct ipt_entry_match *pm;
+	struct xt_state_info *pinfo;
+	struct compat_xt_state_info info;
+	u_int16_t msize;
+
+	pm = (struct ipt_entry_match *)match;
+	msize = pm->u.user.match_size;
+	if (__copy_to_user(*dstptr, pm, sizeof(struct ipt_entry_match)))
+		return -EFAULT;
+	pinfo = (struct xt_state_info *)pm->data;
+	memset(&info, 0, sizeof(struct compat_xt_state_info));
+	info.statemask = pinfo->statemask;
+	if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_match),
+				&info, sizeof(struct compat_xt_state_info)))
+		return -EFAULT;
+	msize -= off;
+	if (put_user(msize, (u_int16_t *)*dstptr))
+		return -EFAULT;
+	*size -= off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int compat_from_user(void *match, void **dstptr,
+		int *size, int off)
+{
+	struct compat_ipt_entry_match *pm;
+	struct ipt_entry_match *dstpm;
+	struct compat_xt_state_info *pinfo;
+	struct xt_state_info info;
+	u_int16_t msize;
+
+	pm = (struct compat_ipt_entry_match *)match;
+	dstpm = (struct ipt_entry_match *)*dstptr;
+	msize = pm->u.user.match_size;
+	memcpy(*dstptr, pm, sizeof(struct compat_ipt_entry_match));
+	pinfo = (struct compat_xt_state_info *)pm->data;
+	memset(&info, 0, sizeof(struct xt_state_info));
+	info.statemask = pinfo->statemask;
+	memcpy(*dstptr + sizeof(struct compat_ipt_entry_match),
+				&info, sizeof(struct xt_state_info));
+	msize += off;
+	dstpm->u.user.match_size = msize;
+	*size += off;
+	*dstptr += msize;
+	return 0;
+}
+
+static int compat(void *match, void **dstptr, int *size, int convert)
+{
+	int ret, off;
+
+	off = XT_ALIGN(sizeof(struct xt_state_info)) -
+		COMPAT_XT_ALIGN(sizeof(struct compat_xt_state_info));
+	switch (convert) {
+		case COMPAT_TO_USER:
+			ret = compat_to_user(match, dstptr, size, off);
+			break;
+		case COMPAT_FROM_USER:
+			ret = compat_from_user(match, dstptr, size, off);
+			break;
+		case COMPAT_CALC_SIZE:
+			*size += off;
+			ret = 0;
+			break;
+		default:
+			ret = -ENOPROTOOPT;
+			break;
+	}
+	return ret;
+}
+#endif
+
 static struct xt_match state_match = {
 	.name		= "state",
 	.match		= &match,
 	.checkentry	= &check,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
@@ -66,6 +147,9 @@ static struct xt_match state6_match = {
 	.name		= "state",
 	.match		= &match,
 	.checkentry	= &check,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
@@ -73,8 +157,6 @@ static int __init init(void)
 {
 	int ret;
 
-	need_conntrack();
-
 	ret = xt_register_match(AF_INET, &state_match);
 	if (ret < 0)
 		return ret;
diff -upr linux-2.6.16.46-0.12.orig/net/netfilter/xt_tcpmss.c linux-2.6.16.46-0.12-027test011/net/netfilter/xt_tcpmss.c
--- linux-2.6.16.46-0.12.orig/net/netfilter/xt_tcpmss.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/netfilter/xt_tcpmss.c	2007-08-28 17:35:32.000000000 +0400
@@ -133,10 +133,25 @@ checkentry6(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat(void *match,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = XT_ALIGN(sizeof(struct xt_tcpmss_match_info)) -
+		COMPAT_XT_ALIGN(sizeof(struct xt_tcpmss_match_info));
+	return ipt_match_align_compat(match, dstptr, size, off, convert);
+}
+#endif
+
 static struct xt_match tcpmss_match = {
 	.name		= "tcpmss",
 	.match		= &match,
 	.checkentry	= &checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
@@ -144,6 +159,9 @@ static struct xt_match tcpmss6_match = {
 	.name		= "tcpmss",
 	.match		= &match,
 	.checkentry	= &checkentry6,
+#ifdef CONFIG_COMPAT
+	.compat		= &compat,
+#endif
 	.me		= THIS_MODULE,
 };
 
diff -upr linux-2.6.16.46-0.12.orig/net/netfilter/xt_tcpudp.c linux-2.6.16.46-0.12-027test011/net/netfilter/xt_tcpudp.c
--- linux-2.6.16.46-0.12.orig/net/netfilter/xt_tcpudp.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/netfilter/xt_tcpudp.c	2007-08-28 17:35:36.000000000 +0400
@@ -266,10 +266,35 @@ udp6_checkentry(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+static int tcp_compat(void *match,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = XT_ALIGN(sizeof(struct xt_tcp)) -
+		COMPAT_XT_ALIGN(sizeof(struct xt_tcp));
+	return ipt_match_align_compat(match, dstptr, size, off, convert);
+}
+
+static int udp_compat(void *match,
+		void **dstptr, int *size, int convert)
+{
+	int off;
+
+	off = XT_ALIGN(sizeof(struct xt_udp)) -
+		COMPAT_XT_ALIGN(sizeof(struct xt_udp));
+	return ipt_match_align_compat(match, dstptr, size, off, convert);
+}
+#endif
+
 static struct xt_match tcp_matchstruct = {
 	.name		= "tcp",
 	.match		= &tcp_match,
 	.checkentry	= &tcp_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &tcp_compat,
+#endif
 	.me		= THIS_MODULE,
 };
 static struct xt_match tcp6_matchstruct = {
@@ -283,6 +308,9 @@ static struct xt_match udp_matchstruct =
 	.name		= "udp",
 	.match		= &udp_match,
 	.checkentry	= &udp_checkentry,
+#ifdef CONFIG_COMPAT
+	.compat		= &udp_compat,
+#endif
 	.me		= THIS_MODULE,
 };
 static struct xt_match udp6_matchstruct = {
diff -upr linux-2.6.16.46-0.12.orig/net/netlink/af_netlink.c linux-2.6.16.46-0.12-027test011/net/netlink/af_netlink.c
--- linux-2.6.16.46-0.12.orig/net/netlink/af_netlink.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/netlink/af_netlink.c	2007-08-28 17:35:33.000000000 +0400
@@ -61,27 +61,14 @@
 #include <net/sock.h>
 #include <net/scm.h>
 #include <net/netlink.h>
+#include <net/netlink_sock.h>
+
+#include <ub/beancounter.h>
+#include <ub/ub_net.h>
 
 #define Nprintk(a...)
 #define NLGRPSZ(x)	(ALIGN(x, sizeof(unsigned long) * 8) / 8)
 
-struct netlink_sock {
-	/* struct sock has to be the first member of netlink_sock */
-	struct sock		sk;
-	u32			pid;
-	u32			dst_pid;
-	u32			dst_group;
-	u32			flags;
-	u32			subscriptions;
-	u32			ngroups;
-	unsigned long		*groups;
-	unsigned long		state;
-	wait_queue_head_t	wait;
-	struct netlink_callback	*cb;
-	spinlock_t		cb_lock;
-	void			(*data_ready)(struct sock *sk, int bytes);
-	struct module		*module;
-};
 
 #define NETLINK_KERNEL_SOCKET	0x1
 #define NETLINK_RECV_PKTINFO	0x2
@@ -210,7 +197,10 @@ static __inline__ struct sock *netlink_l
 	read_lock(&nl_table_lock);
 	head = nl_pid_hashfn(hash, pid);
 	sk_for_each(sk, node, head) {
-		if (nlk_sk(sk)->pid == pid) {
+		/* VEs should find sockets, created by kernel */
+		if ((nlk_sk(sk)->pid == pid) &&
+				(!pid || ve_accessible_strict(sk->owner_env,
+							      get_exec_env()))){
 			sock_hold(sk);
 			goto found;
 		}
@@ -310,7 +300,9 @@ static int netlink_insert(struct sock *s
 	head = nl_pid_hashfn(hash, pid);
 	len = 0;
 	sk_for_each(osk, node, head) {
-		if (nlk_sk(osk)->pid == pid)
+		if ((nlk_sk(sk)->pid == pid) &&
+				ve_accessible_strict(sk->owner_env,
+					get_exec_env()))
 			break;
 		len++;
 	}
@@ -363,6 +355,8 @@ static int __netlink_create(struct socke
 	sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1);
 	if (!sk)
 		return -ENOMEM;
+	if (ub_other_sock_charge(sk))
+		goto out_free;
 
 	sock_init_data(sock, sk);
 
@@ -373,6 +367,10 @@ static int __netlink_create(struct socke
 	sk->sk_destruct = netlink_sock_destruct;
 	sk->sk_protocol = protocol;
 	return 0;
+
+out_free:
+	sk_free(sk);
+	return -ENOMEM;
 }
 
 static int netlink_create(struct socket *sock, int protocol)
@@ -478,7 +476,7 @@ static int netlink_autobind(struct socke
 	struct hlist_head *head;
 	struct sock *osk;
 	struct hlist_node *node;
-	s32 pid = current->tgid;
+	s32 pid = virt_pid(current);
 	int err;
 	static s32 rover = -4097;
 
@@ -487,7 +485,9 @@ retry:
 	netlink_table_grab();
 	head = nl_pid_hashfn(hash, pid);
 	sk_for_each(osk, node, head) {
-		if (nlk_sk(osk)->pid == pid) {
+		if ((nlk_sk(osk)->pid == pid) &&
+				ve_accessible_strict(osk->owner_env,
+					get_exec_env())) {
 			/* Bind collision, search negative pid values. */
 			pid = rover--;
 			if (rover > -4097)
@@ -512,7 +512,7 @@ retry:
 static inline int netlink_capable(struct socket *sock, unsigned int flag) 
 { 
 	return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) ||
-	       capable(CAP_NET_ADMIN);
+	       capable(CAP_VE_NET_ADMIN);
 } 
 
 static void
@@ -846,6 +846,9 @@ static inline int do_one_broadcast(struc
 	    !test_bit(p->group - 1, nlk->groups))
 		goto out;
 
+	if (!ve_accessible_strict(get_exec_env(), sk->owner_env))
+		goto out;
+
 	if (p->failure) {
 		netlink_overrun(sk);
 		goto out;
@@ -943,6 +946,9 @@ static inline int do_one_set_err(struct 
 	    !test_bit(p->group - 1, nlk->groups))
 		goto out;
 
+	if (!ve_accessible_strict(get_exec_env(), sk->owner_env))
+		goto out;
+
 	sk->sk_err = p->code;
 	sk->sk_error_report(sk);
 out:
@@ -1077,12 +1083,17 @@ static int netlink_sendmsg(struct kiocb 
 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
 	struct sock *sk = sock->sk;
 	struct netlink_sock *nlk = nlk_sk(sk);
-	struct sockaddr_nl *addr=msg->msg_name;
+	struct sockaddr_nl *addr = msg->msg_name;
 	u32 dst_pid;
-	u32 dst_group;
 	struct sk_buff *skb;
 	int err;
 	struct scm_cookie scm;
+	struct sock *dstsk;
+	long timeo;
+	int no_ubc, no_buf;
+	unsigned long chargesize;
+
+	DECLARE_WAITQUEUE(wait, current);
 
 	if (msg->msg_flags&MSG_OOB)
 		return -EOPNOTSUPP;
@@ -1093,17 +1104,16 @@ static int netlink_sendmsg(struct kiocb 
 	if (err < 0)
 		return err;
 
+	/* Broadcasts from user to kernel are disabled. This is OK
+	 * according to ANK */
 	if (msg->msg_namelen) {
 		if (addr->nl_family != AF_NETLINK)
 			return -EINVAL;
 		dst_pid = addr->nl_pid;
-		dst_group = ffs(addr->nl_groups);
-		if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND))
+		if (addr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND))
 			return -EPERM;
-	} else {
+	} else
 		dst_pid = nlk->dst_pid;
-		dst_group = nlk->dst_group;
-	}
 
 	if (!nlk->pid) {
 		err = netlink_autobind(sock);
@@ -1116,12 +1126,12 @@ static int netlink_sendmsg(struct kiocb 
 		goto out;
 	err = -ENOBUFS;
 	skb = alloc_skb(len, GFP_KERNEL);
-	if (skb==NULL)
+	if (skb == NULL)
 		goto out;
 
 	NETLINK_CB(skb).pid	= nlk->pid;
 	NETLINK_CB(skb).dst_pid = dst_pid;
-	NETLINK_CB(skb).dst_group = dst_group;
+	NETLINK_CB(skb).dst_group = 0;
 	NETLINK_CB(skb).loginuid = audit_get_loginuid(current->audit_context);
 	selinux_get_task_sid(current, &(NETLINK_CB(skb).sid));
 	memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
@@ -1133,25 +1143,88 @@ static int netlink_sendmsg(struct kiocb 
 	 */
 
 	err = -EFAULT;
-	if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len)) {
-		kfree_skb(skb);
-		goto out;
-	}
+	if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len))
+		goto out_free;
 
 	err = security_netlink_send(sk, skb);
-	if (err) {
-		kfree_skb(skb);
-		goto out;
+	if (err)
+		goto out_free;
+
+	timeo = sock_sndtimeo(sk, msg->msg_flags&MSG_DONTWAIT);
+retry:
+	dstsk = netlink_getsockbypid(sk, dst_pid);
+	if (IS_ERR(dstsk)) {
+		err = PTR_ERR(dstsk);
+		goto out_free;
 	}
 
-	if (dst_group) {
-		atomic_inc(&skb->users);
-		netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL);
+	nlk = nlk_sk(dstsk);
+#ifdef NL_EMULATE_DEV
+	if (nlk->handler) {
+		skb_orphan(skb);
+		err = nlk->handler(protocol, skb);
+		goto out_put;
 	}
-	err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
+#endif
+
+	/* BTW, it could be done once, before the retry loop */
+	chargesize = skb_charge_fullsize(skb);
+	no_ubc = ub_sock_getwres_other(sk, chargesize);
+	no_buf = atomic_read(&dstsk->sk_rmem_alloc) > dstsk->sk_rcvbuf ||
+		test_bit(0, &nlk->state);
+	if (no_ubc || no_buf) {
+		wait_queue_head_t *sleep;
+
+		if (!no_ubc)
+			ub_sock_retwres_other(sk, chargesize,
+					      SOCK_MIN_UBCSPACE_CH);
+		err = -EAGAIN;
+		if (timeo == 0) {
+			kfree_skb(skb);
+			goto out_put;
+		}
+
+		/* wake up comes to different queues */
+		sleep = no_ubc ? sk->sk_sleep : &nlk->wait;
+		__set_current_state(TASK_INTERRUPTIBLE);
+		add_wait_queue(sleep, &wait);
 
+		/* this if can't be moved upper because ub_sock_snd_queue_add()
+		 * may change task state to TASK_RUNNING */
+		if (no_ubc)
+			ub_sock_sndqueueadd_other(sk, chargesize);
+
+		if ((atomic_read(&dstsk->sk_rmem_alloc) > dstsk->sk_rcvbuf ||
+		     test_bit(0, &nlk->state) || no_ubc) &&
+		    !sock_flag(dstsk, SOCK_DEAD))
+			timeo = schedule_timeout(timeo);
+
+		__set_current_state(TASK_RUNNING);
+		remove_wait_queue(sleep, &wait);
+		if (no_ubc)
+			ub_sock_sndqueuedel(sk);
+		sock_put(dstsk);
+
+		if (!signal_pending(current))
+			goto retry;
+		err = sock_intr_errno(timeo);
+		goto out_free;
+	}
+
+	skb_orphan(skb);
+	skb_set_owner_r(skb, dstsk);
+	ub_skb_set_charge(skb, sk, chargesize, UB_OTHERSOCKBUF);
+	skb_queue_tail(&dstsk->sk_receive_queue, skb);
+	dstsk->sk_data_ready(dstsk, len);
+	err = len;
+out_put:
+	sock_put(dstsk);
 out:
 	return err;
+
+out_free:
+	kfree_skb(skb);
+	return err;
 }
 
 static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
@@ -1305,6 +1378,10 @@ static int netlink_dump(struct sock *sk)
 	skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL);
 	if (!skb)
 		return -ENOBUFS;
+	if (ub_nlrcvbuf_charge(skb, sk) < 0) {
+		kfree_skb(skb);
+		return -EACCES;
+	}
 
 	spin_lock(&nlk->cb_lock);
 
@@ -1473,8 +1550,15 @@ void netlink_run_queue(struct sock *sk, 
 		*qlen = skb_queue_len(&sk->sk_receive_queue);
 
 	for (; *qlen; (*qlen)--) {
+		int ret;
+		struct ve_struct *old_env;
 		skb = skb_dequeue(&sk->sk_receive_queue);
-		if (netlink_rcv_skb(skb, cb)) {
+
+		old_env = set_exec_env(skb->owner_env);
+		ret = netlink_rcv_skb(skb, cb);
+		(void)set_exec_env(old_env);
+
+		if (ret) {
 			if (skb->len)
 				skb_queue_head(&sk->sk_receive_queue, skb);
 			else {
@@ -1742,6 +1826,7 @@ enomem:
 
 	sock_register(&netlink_family_ops);
 #ifdef CONFIG_PROC_FS
+	/* FIXME: virtualize before give access from VEs */
 	proc_net_fops_create("netlink", 0, &netlink_seq_fops);
 #endif
 	/* The netlink device handler may be needed early. */ 
diff -upr linux-2.6.16.46-0.12.orig/net/netlink/attr.c linux-2.6.16.46-0.12-027test011/net/netlink/attr.c
--- linux-2.6.16.46-0.12.orig/net/netlink/attr.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/netlink/attr.c	2007-08-28 17:35:32.000000000 +0400
@@ -119,7 +119,7 @@ int nla_parse(struct nlattr *tb[], int m
 	}
 
 	if (unlikely(rem > 0))
-		printk(KERN_WARNING "netlink: %d bytes leftover after parsing "
+		ve_printk(VE_LOG, KERN_WARNING "netlink: %d bytes leftover after parsing "
 		       "attributes.\n", rem);
 
 	err = 0;
diff -upr linux-2.6.16.46-0.12.orig/net/packet/af_packet.c linux-2.6.16.46-0.12-027test011/net/packet/af_packet.c
--- linux-2.6.16.46-0.12.orig/net/packet/af_packet.c	2007-08-24 19:28:09.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/packet/af_packet.c	2007-08-28 17:35:32.000000000 +0400
@@ -79,6 +79,8 @@
 #include <linux/module.h>
 #include <linux/init.h>
 
+#include <ub/ub_net.h>
+
 #ifdef CONFIG_INET
 #include <net/inet_common.h>
 #endif
@@ -280,7 +282,8 @@ static int packet_rcv_spkt(struct sk_buf
 	 *	so that this procedure is noop.
 	 */
 
-	if (skb->pkt_type == PACKET_LOOPBACK)
+	if (skb->pkt_type == PACKET_LOOPBACK ||
+			!ve_accessible(skb->owner_env, sk->owner_env))
 		goto out;
 
 	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
@@ -472,6 +475,11 @@ static int packet_rcv(struct sk_buff *sk
 	sk = pt->af_packet_priv;
 	po = pkt_sk(sk);
 
+	if (!ve_accessible(skb->owner_env, sk->owner_env))
+		goto drop;
+
+	skb_orphan(skb);
+
 	skb->dev = dev;
 
 	if (dev->hard_header) {
@@ -531,6 +539,9 @@ static int packet_rcv(struct sk_buff *sk
 	if (pskb_trim(skb, snaplen))
 		goto drop_n_acct;
 
+	if (ub_sockrcvbuf_charge(sk, skb))
+		goto drop_n_acct;
+
 	skb_set_owner_r(skb, sk);
 	skb->dev = NULL;
 	dst_release(skb->dst);
@@ -581,6 +592,11 @@ static int tpacket_rcv(struct sk_buff *s
 	sk = pt->af_packet_priv;
 	po = pkt_sk(sk);
 
+	if (!ve_accessible(skb->owner_env, sk->owner_env))
+		goto drop;
+
+	skb_orphan(skb);
+
 	if (dev->hard_header) {
 		if (sk->sk_type != SOCK_DGRAM)
 			skb_push(skb, skb->data - skb->mac.raw);
@@ -630,6 +646,12 @@ static int tpacket_rcv(struct sk_buff *s
 	if (snaplen > skb->len-skb->data_len)
 		snaplen = skb->len-skb->data_len;
 
+	if (copy_skb &&
+	    ub_sockrcvbuf_charge(sk, copy_skb)) {
+		spin_lock(&sk->sk_receive_queue.lock);
+		goto ring_is_full;
+	}
+
 	spin_lock(&sk->sk_receive_queue.lock);
 	h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head);
 	
@@ -1010,6 +1032,8 @@ static int packet_create(struct socket *
 	sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1);
 	if (sk == NULL)
 		goto out;
+	if (ub_other_sock_charge(sk))
+		goto out_free;
 
 	sock->ops = &packet_ops;
 #ifdef CONFIG_SOCK_PACKET
@@ -1048,6 +1072,9 @@ static int packet_create(struct socket *
 	sk_add_node(sk, &packet_sklist);
 	write_unlock_bh(&packet_sklist_lock);
 	return(0);
+
+out_free:
+	sk_free(sk);
 out:
 	return err;
 }
@@ -1430,11 +1457,16 @@ static int packet_notifier(struct notifi
 	struct sock *sk;
 	struct hlist_node *node;
 	struct net_device *dev = (struct net_device*)data;
+	struct ve_struct *ve;
 
+	ve = get_exec_env();
 	read_lock(&packet_sklist_lock);
 	sk_for_each(sk, node, &packet_sklist) {
 		struct packet_sock *po = pkt_sk(sk);
 
+		if (!ve_accessible_strict(sk->owner_env, ve))
+			continue;
+
 		switch (msg) {
 		case NETDEV_UNREGISTER:
 #ifdef CONFIG_PACKET_MULTICAST
@@ -1845,6 +1877,8 @@ static inline struct sock *packet_seq_id
 	struct hlist_node *node;
 
 	sk_for_each(s, node, &packet_sklist) {
+		if (!ve_accessible(s->owner_env, get_exec_env()))
+			continue;
 		if (!off--)
 			return s;
 	}
@@ -1860,9 +1894,14 @@ static void *packet_seq_start(struct seq
 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	++*pos;
-	return  (v == SEQ_START_TOKEN) 
-		? sk_head(&packet_sklist) 
-		: sk_next((struct sock*)v) ;
+	do {
+		v = (v == SEQ_START_TOKEN)
+			? sk_head(&packet_sklist)
+			: sk_next((struct sock*)v);
+	} while (v != NULL &&
+			!ve_accessible(((struct sock*)v)->owner_env,
+				get_exec_env()));
+	return v;
 }
 
 static void packet_seq_stop(struct seq_file *seq, void *v)
@@ -1918,7 +1957,7 @@ static struct file_operations packet_seq
 
 static void __exit packet_exit(void)
 {
-	proc_net_remove("packet");
+	remove_proc_glob_entry("net/packet", NULL);
 	unregister_netdevice_notifier(&packet_netdev_notifier);
 	sock_unregister(PF_PACKET);
 	proto_unregister(&packet_proto);
@@ -1933,7 +1972,7 @@ static int __init packet_init(void)
 
 	sock_register(&packet_family_ops);
 	register_netdevice_notifier(&packet_netdev_notifier);
-	proc_net_fops_create("packet", 0, &packet_seq_fops);
+	proc_glob_fops_create("net/packet", 0, &packet_seq_fops);
 out:
 	return rc;
 }
diff -upr linux-2.6.16.46-0.12.orig/net/sched/sch_generic.c linux-2.6.16.46-0.12-027test011/net/sched/sch_generic.c
--- linux-2.6.16.46-0.12.orig/net/sched/sch_generic.c	2007-08-24 19:28:35.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/sched/sch_generic.c	2007-08-28 17:35:32.000000000 +0400
@@ -97,6 +97,7 @@ static inline int qdisc_restart(struct n
 	/* Dequeue packet */
 	if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) {
 		unsigned nolock = (dev->features & NETIF_F_LLTX);
+		struct ve_struct *envid;
 
 		dev->gso_skb = NULL;
 
@@ -109,6 +110,7 @@ static inline int qdisc_restart(struct n
 		 * of lock congestion it should return -1 and the packet
 		 * will be requeued.
 		 */
+		envid = set_exec_env(skb->owner_env);
 		if (!nolock) {
 			if (!netif_tx_trylock(dev)) {
 			collision:
@@ -123,6 +125,7 @@ static inline int qdisc_restart(struct n
 					kfree_skb(skb);
 					if (net_ratelimit())
 						printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
+					(void)set_exec_env(envid);
 					return -1;
 				}
 				__get_cpu_var(netdev_rx_stat).cpu_collision++;
@@ -143,6 +146,7 @@ static inline int qdisc_restart(struct n
 						netif_tx_unlock(dev);
 					}
 					spin_lock(&dev->queue_lock);
+					(void)set_exec_env(envid);
 					return -1;
 				}
 				if (ret == NETDEV_TX_LOCKED && nolock) {
@@ -176,6 +180,7 @@ requeue:
 		else
 			q->ops->requeue(skb, q);
 		netif_schedule(dev);
+		(void)set_exec_env(envid);
 		return 1;
 	}
 	BUG_ON((int) q->q.qlen < 0);
@@ -619,3 +624,4 @@ EXPORT_SYMBOL(qdisc_destroy);
 EXPORT_SYMBOL(qdisc_reset);
 EXPORT_SYMBOL(qdisc_lock_tree);
 EXPORT_SYMBOL(qdisc_unlock_tree);
+EXPORT_SYMBOL(dev_shutdown);
diff -upr linux-2.6.16.46-0.12.orig/net/sched/sch_teql.c linux-2.6.16.46-0.12-027test011/net/sched/sch_teql.c
--- linux-2.6.16.46-0.12.orig/net/sched/sch_teql.c	2007-08-24 19:28:35.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/sched/sch_teql.c	2007-08-28 17:35:31.000000000 +0400
@@ -189,6 +189,9 @@ static int teql_qdisc_init(struct Qdisc 
 	struct teql_master *m = (struct teql_master*)sch->ops;
 	struct teql_sched_data *q = qdisc_priv(sch);
 
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
 	if (dev->hard_header_len > m->dev->hard_header_len)
 		return -EINVAL;
 
diff -upr linux-2.6.16.46-0.12.orig/net/socket.c linux-2.6.16.46-0.12-027test011/net/socket.c
--- linux-2.6.16.46-0.12.orig/net/socket.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/socket.c	2007-08-28 17:35:32.000000000 +0400
@@ -84,6 +84,7 @@
 #include <linux/compat.h>
 #include <linux/kmod.h>
 #include <linux/audit.h>
+#include <linux/in.h>
 
 #ifdef CONFIG_NET_RADIO
 #include <linux/wireless.h>		/* Note : will define WIRELESS_EXT */
@@ -201,15 +202,6 @@ static DEFINE_PER_CPU(int, sockets_in_us
  *	divide and look after the messy bits.
  */
 
-#define MAX_SOCK_ADDR	128		/* 108 for Unix domain - 
-					   16 for IP, 16 for IPX,
-					   24 for IPv6,
-					   about 80 for AX.25 
-					   must be at least one bigger than
-					   the AF_UNIX size (see net/unix/af_unix.c
-					   :unix_mkname()).  
-					 */
-					 
 /**
  *	move_addr_to_kernel	-	copy a socket address into kernel space
  *	@uaddr: Address in user space
@@ -1078,6 +1070,48 @@ int sock_wake_async(struct socket *sock,
 	return 0;
 }
 
+int vz_security_family_check(int family)
+{
+#ifdef CONFIG_VE
+	if (ve_is_super(get_exec_env()))
+		return 0;
+
+	switch (family) {
+	case PF_UNSPEC:
+	case PF_PACKET:
+	case PF_NETLINK:
+	case PF_UNIX:
+	case PF_INET:
+	case PF_INET6:
+		break;
+	default:
+		return -EAFNOSUPPORT;
+        }
+#endif
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vz_security_family_check);
+
+int vz_security_protocol_check(int protocol)
+{
+#ifdef CONFIG_VE
+	if (ve_is_super(get_exec_env()))
+		return 0;
+
+	switch (protocol) {
+	case  IPPROTO_IP:
+	case  IPPROTO_TCP:
+	case  IPPROTO_UDP:
+	case  IPPROTO_RAW:
+		break;
+	default:
+		return -EAFNOSUPPORT;
+	}
+#endif
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vz_security_protocol_check);
+
 static int __sock_create(int family, int type, int protocol, struct socket **res, int kern)
 {
 	int err;
@@ -1105,6 +1139,11 @@ static int __sock_create(int family, int
 		family = PF_PACKET;
 	}
 
+	/* VZ compatibility layer */
+	err = vz_security_family_check(family);
+	if (err < 0)
+		return err;
+
 	err = security_socket_create(family, type, protocol, kern);
 	if (err)
 		return err;
diff -upr linux-2.6.16.46-0.12.orig/net/sunrpc/clnt.c linux-2.6.16.46-0.12-027test011/net/sunrpc/clnt.c
--- linux-2.6.16.46-0.12.orig/net/sunrpc/clnt.c	2007-08-24 19:28:27.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/sunrpc/clnt.c	2007-08-28 17:35:33.000000000 +0400
@@ -65,6 +65,35 @@ static u32 *	call_header(struct rpc_task
 static u32 *	call_verify(struct rpc_task *task);
 
 
+/*
+ * Grand abort timeout (stop the client if occures)
+ */
+int xprt_abort_timeout = RPC_MAX_ABORT_TIMEOUT;
+
+static int rpc_abort_hard(struct rpc_task *task)
+{
+	struct rpc_clnt *clnt;
+	clnt = task->tk_client;
+
+	if (clnt->cl_pr_time == 0) {
+		clnt->cl_pr_time = jiffies;
+		return 0;
+	}
+	if (xprt_abort_timeout == RPC_MAX_ABORT_TIMEOUT)
+		return 0;
+	if (time_before(jiffies, clnt->cl_pr_time + xprt_abort_timeout * HZ))
+		return 0;
+
+	clnt->cl_broken = 1;
+	rpc_killall_tasks(clnt);
+	return -ETIMEDOUT;
+}
+
+static void rpc_abort_clear(struct rpc_task *task)
+{
+	task->tk_client->cl_pr_time = 0;
+}
+
 static int
 rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
 {
@@ -176,10 +205,10 @@ rpc_new_client(struct rpc_xprt *xprt, ch
 	}
 
 	/* save the nodename */
-	clnt->cl_nodelen = strlen(system_utsname.nodename);
+	clnt->cl_nodelen = strlen(ve_utsname.nodename);
 	if (clnt->cl_nodelen > UNX_MAXNODENAME)
 		clnt->cl_nodelen = UNX_MAXNODENAME;
-	memcpy(clnt->cl_nodename, system_utsname.nodename, clnt->cl_nodelen);
+	memcpy(clnt->cl_nodename, ve_utsname.nodename, clnt->cl_nodelen);
 	return clnt;
 
 out_no_auth:
@@ -252,6 +281,7 @@ rpc_clone_client(struct rpc_clnt *clnt)
 	new->cl_autobind = 0;
 	new->cl_oneshot = 0;
 	new->cl_dead = 0;
+	new->cl_broken = 0;
 	if (!IS_ERR(new->cl_dentry)) {
 		dget(new->cl_dentry);
 		rpc_get_mount();
@@ -451,7 +481,7 @@ int rpc_call_sync(struct rpc_clnt *clnt,
 	int		status;
 
 	/* If this client is slain all further I/O fails */
-	if (clnt->cl_dead) 
+	if (clnt->cl_dead || clnt->cl_broken) 
 		return -EIO;
 
 	BUG_ON(flags & RPC_TASK_ASYNC);
@@ -492,7 +522,7 @@ rpc_call_async(struct rpc_clnt *clnt, st
 	int		status;
 
 	/* If this client is slain all further I/O fails */
-	if (clnt->cl_dead) 
+	if (clnt->cl_dead || clnt->cl_broken) 
 		return -EIO;
 
 	flags |= RPC_TASK_ASYNC;
@@ -802,6 +832,7 @@ call_bind_status(struct rpc_task *task)
 	if (task->tk_status >= 0) {
 		dprintk("RPC: %4d call_bind_status (status %d)\n",
 					task->tk_pid, task->tk_status);
+		rpc_abort_clear(task);
 		task->tk_status = 0;
 		task->tk_action = call_connect;
 		return;
@@ -816,7 +847,7 @@ call_bind_status(struct rpc_task *task)
 	case -ETIMEDOUT:
 		dprintk("RPC: %4d rpcbind request timed out\n",
 				task->tk_pid);
-		if (RPC_IS_SOFT(task)) {
+		if (RPC_IS_SOFT(task) || rpc_abort_hard(task)) {
 			status = -EIO;
 			break;
 		}
@@ -892,8 +923,10 @@ call_connect_status(struct rpc_task *tas
 	case -ENOTCONN:
 	case -ETIMEDOUT:
 	case -EAGAIN:
-		task->tk_action = call_bind;
-		break;
+		if (!rpc_abort_hard(task)) {
+			task->tk_action = call_bind;
+			break;
+		}
 	default:
 		rpc_exit(task, -EIO);
 		break;
@@ -1018,7 +1051,7 @@ call_timeout(struct rpc_task *task)
 	}
 
 	dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid);
-	if (RPC_IS_SOFT(task)) {
+	if (RPC_IS_SOFT(task) || rpc_abort_hard(task)) {
 		printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
 				clnt->cl_protname, clnt->cl_server);
 		rpc_exit(task, -EIO);
@@ -1059,7 +1092,7 @@ call_decode(struct rpc_task *task)
 	}
 
 	if (task->tk_status < 12) {
-		if (!RPC_IS_SOFT(task)) {
+		if (!RPC_IS_SOFT(task) && !rpc_abort_hard(task)) {
 			task->tk_action = call_bind;
 			clnt->cl_stats->rpcretrans++;
 			goto out_retry;
@@ -1070,6 +1103,7 @@ call_decode(struct rpc_task *task)
 		return;
 	}
 
+	rpc_abort_clear(task);
 	req->rq_rcv_buf.len = req->rq_private_buf.len;
 
 	/* Check that the softirq receive buffer is valid */
diff -upr linux-2.6.16.46-0.12.orig/net/sunrpc/rpc_pipe.c linux-2.6.16.46-0.12-027test011/net/sunrpc/rpc_pipe.c
--- linux-2.6.16.46-0.12.orig/net/sunrpc/rpc_pipe.c	2007-08-24 19:28:25.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/sunrpc/rpc_pipe.c	2007-08-28 17:35:33.000000000 +0400
@@ -826,6 +826,7 @@ static struct file_system_type rpc_pipe_
 	.name		= "rpc_pipefs",
 	.get_sb		= rpc_get_sb,
 	.kill_sb	= kill_litter_super,
+	.fs_flags	= FS_VIRTUALIZED,	
 };
 
 static void
diff -upr linux-2.6.16.46-0.12.orig/net/sunrpc/sched.c linux-2.6.16.46-0.12-027test011/net/sunrpc/sched.c
--- linux-2.6.16.46-0.12.orig/net/sunrpc/sched.c	2007-08-24 19:28:11.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/sunrpc/sched.c	2007-08-28 17:35:33.000000000 +0400
@@ -323,16 +323,6 @@ static void rpc_make_runnable(struct rpc
 }
 
 /*
- * Place a newly initialized task on the workqueue.
- */
-static inline void
-rpc_schedule_run(struct rpc_task *task)
-{
-	rpc_set_active(task);
-	rpc_make_runnable(task);
-}
-
-/*
  * Prepare for sleeping on a wait queue.
  * By always appending tasks to the list we ensure FIFO behavior.
  * NB: An RPC task will only receive interrupt-driven events as long
@@ -607,7 +597,9 @@ EXPORT_SYMBOL(rpc_exit_task);
 static int __rpc_execute(struct rpc_task *task)
 {
 	int		status = 0;
+	struct ve_struct *env;
 
+	env = set_exec_env(task->tk_client->cl_xprt->owner_env);
 	dprintk("RPC: %4d rpc_execute flgs %x\n",
 				task->tk_pid, task->tk_flags);
 
@@ -662,10 +654,14 @@ static int __rpc_execute(struct rpc_task
 		rpc_clear_running(task);
 		if (RPC_IS_ASYNC(task)) {
 			/* Careful! we may have raced... */
-			if (RPC_IS_QUEUED(task))
+			if (RPC_IS_QUEUED(task)) {
+				(void)set_exec_env(env);
 				return 0;
-			if (rpc_test_and_set_running(task))
+			}
+			if (rpc_test_and_set_running(task)) {
+				(void)set_exec_env(env);
 				return 0;
+			}
 			continue;
 		}
 
@@ -696,6 +692,7 @@ static int __rpc_execute(struct rpc_task
 	rpc_mark_complete_task(task);
 	/* Release all resources associated with the task */
 	rpc_release_task(task);
+	(void)set_exec_env(env);
 	return status;
 }
 
@@ -814,6 +811,13 @@ void rpc_init_task(struct rpc_task *task
 	/* Add to global list of all tasks */
 	spin_lock(&rpc_sched_lock);
 	list_add_tail(&task->tk_task, &all_tasks);
+
+	/* Prevent the task to run if client is marked as dead */
+	if (task->tk_client != NULL && task->tk_client->cl_dead) {
+		task->tk_flags |= RPC_TASK_KILLED;
+		rpc_exit(task, -EIO);
+		rpc_wake_up_task(task);
+	}
 	spin_unlock(&rpc_sched_lock);
 
 	BUG_ON(task->tk_ops == NULL);
@@ -986,10 +990,12 @@ fail:
 
 void rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func)
 {
+	rpc_set_active(child);
+
 	spin_lock_bh(&childq.lock);
 	/* N.B. Is it possible for the child to have already finished? */
 	__rpc_sleep_on(&childq, task, func, NULL);
-	rpc_schedule_run(child);
+	rpc_make_runnable(child);
 	spin_unlock_bh(&childq.lock);
 }
 
diff -upr linux-2.6.16.46-0.12.orig/net/sunrpc/svcsock.c linux-2.6.16.46-0.12-027test011/net/sunrpc/svcsock.c
--- linux-2.6.16.46-0.12.orig/net/sunrpc/svcsock.c	2007-08-24 19:28:27.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/sunrpc/svcsock.c	2007-08-28 17:35:33.000000000 +0400
@@ -361,6 +361,9 @@ svc_sendto(struct svc_rqst *rqstp, struc
 	size_t		base = xdr->page_base;
 	unsigned int	pglen = xdr->page_len;
 	unsigned int	flags = MSG_MORE;
+	struct ve_struct *old_env;
+
+	old_env = set_exec_env(sock->sk->owner_env);
 
 	slen = xdr->len;
 
@@ -425,6 +428,8 @@ out:
 			rqstp->rq_sock, xdr->head[0].iov_base, xdr->head[0].iov_len, xdr->len, len,
 		rqstp->rq_addr.sin_addr.s_addr);
 
+	(void)set_exec_env(old_env);
+
 	return len;
 }
 
@@ -437,9 +442,12 @@ svc_recv_available(struct svc_sock *svsk
 	mm_segment_t	oldfs;
 	struct socket	*sock = svsk->sk_sock;
 	int		avail, err;
+	struct ve_struct *old_env;
 
 	oldfs = get_fs(); set_fs(KERNEL_DS);
+	old_env = set_exec_env(sock->sk->owner_env);
 	err = sock->ops->ioctl(sock, TIOCINQ, (unsigned long) &avail);
+	(void)set_exec_env(old_env);
 	set_fs(oldfs);
 
 	return (err >= 0)? avail : err;
@@ -454,6 +462,7 @@ svc_recvfrom(struct svc_rqst *rqstp, str
 	struct msghdr	msg;
 	struct socket	*sock;
 	int		len, alen;
+	struct ve_struct *old_env;
 
 	rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
 	sock = rqstp->rq_sock->sk_sock;
@@ -465,7 +474,9 @@ svc_recvfrom(struct svc_rqst *rqstp, str
 
 	msg.msg_flags	= MSG_DONTWAIT;
 
+	old_env = set_exec_env(sock->sk->owner_env);
 	len = kernel_recvmsg(sock, &msg, iov, nr, buflen, MSG_DONTWAIT);
+	(void)set_exec_env(old_env);
 
 	/* sock_recvmsg doesn't fill in the name/namelen, so we must..
 	 * possibly we should cache this in the svc_sock structure
@@ -1443,6 +1454,8 @@ svc_delete_socket(struct svc_sock *svsk)
 	serv = svsk->sk_server;
 	sk = svsk->sk_sk;
 
+	/* XXX: serialization? */
+	sk->sk_user_data = NULL;
 	sk->sk_state_change = svsk->sk_ostate;
 	sk->sk_data_ready = svsk->sk_odata;
 	sk->sk_write_space = svsk->sk_owspace;
diff -upr linux-2.6.16.46-0.12.orig/net/sunrpc/sysctl.c linux-2.6.16.46-0.12-027test011/net/sunrpc/sysctl.c
--- linux-2.6.16.46-0.12.orig/net/sunrpc/sysctl.c	2007-08-24 19:28:11.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/sunrpc/sysctl.c	2007-08-28 17:35:33.000000000 +0400
@@ -126,6 +126,8 @@ static unsigned int min_slot_table_size 
 static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE;
 static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT;
 static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT;
+static int xprt_min_abort_timeout = RPC_MIN_ABORT_TIMEOUT;
+static int xprt_max_abort_timeout = RPC_MAX_ABORT_TIMEOUT;
 
 static ctl_table sunrpc_table[] = {
 	{
@@ -204,6 +206,17 @@ static ctl_table sunrpc_table[] = {
 		.extra1		= &xprt_min_resvport_limit,
 		.extra2		= &xprt_max_resvport_limit
 	},
+	{
+		.ctl_name	= CTL_ABORT_TIMEOUT,
+		.procname	= "abort_timeout",
+		.data		= &xprt_abort_timeout,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &xprt_min_abort_timeout,
+		.extra2		= &xprt_max_abort_timeout
+	},
 	{ .ctl_name = 0 }
 };
 
diff -upr linux-2.6.16.46-0.12.orig/net/sunrpc/xprt.c linux-2.6.16.46-0.12-027test011/net/sunrpc/xprt.c
--- linux-2.6.16.46-0.12.orig/net/sunrpc/xprt.c	2007-08-24 19:28:27.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/sunrpc/xprt.c	2007-08-28 17:35:33.000000000 +0400
@@ -482,10 +482,13 @@ int xprt_adjust_timeout(struct rpc_rqst 
 static void xprt_autoclose(void *args)
 {
 	struct rpc_xprt *xprt = (struct rpc_xprt *)args;
+	struct ve_struct *ve;
 
+	ve = set_exec_env(xprt->owner_env);
 	xprt_disconnect(xprt);
 	xprt->ops->close(xprt);
 	xprt_release_write(xprt, NULL);
+	(void)set_exec_env(ve);
 }
 
 /**
@@ -886,6 +889,7 @@ static struct rpc_xprt *xprt_setup(int p
 	memset(xprt, 0, sizeof(*xprt)); /* Nnnngh! */
 
 	xprt->addr = *ap;
+	xprt->owner_env = get_ve(get_exec_env());
 
 	switch (proto) {
 	case IPPROTO_UDP:
@@ -901,6 +905,7 @@ static struct rpc_xprt *xprt_setup(int p
 		break;
 	}
 	if (result) {
+		put_ve(xprt->owner_env);
 		kfree(xprt);
 		return ERR_PTR(result);
 	}
@@ -964,6 +969,7 @@ int xprt_destroy(struct rpc_xprt *xprt)
 	xprt->shutdown = 1;
 	del_timer_sync(&xprt->timer);
 	xprt->ops->destroy(xprt);
+	put_ve(xprt->owner_env);
 	kfree(xprt);
 
 	return 0;
diff -upr linux-2.6.16.46-0.12.orig/net/sunrpc/xprtsock.c linux-2.6.16.46-0.12-027test011/net/sunrpc/xprtsock.c
--- linux-2.6.16.46-0.12.orig/net/sunrpc/xprtsock.c	2007-08-24 19:28:27.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/net/sunrpc/xprtsock.c	2007-08-28 17:35:33.000000000 +0400
@@ -1007,7 +1007,12 @@ static void xs_udp_connect_worker(void *
 	struct rpc_xprt *xprt = (struct rpc_xprt *) args;
 	struct socket *sock = xprt->sock;
 	int err, status = -EIO;
+	struct ve_struct *ve;
 
+	ve = set_exec_env(xprt->owner_env);
+	down_read(&ve->op_sem);
+	if (!ve->is_running)
+		goto out;
 	if (xprt->shutdown || xprt->addr.sin_port == 0)
 		goto out;
 
@@ -1053,6 +1058,8 @@ static void xs_udp_connect_worker(void *
 out:
 	xprt_wake_pending_tasks(xprt, status);
 	xprt_clear_connecting(xprt);
+	up_read(&ve->op_sem);
+	(void)set_exec_env(ve);
 }
 
 /*
@@ -1090,7 +1097,12 @@ static void xs_tcp_connect_worker(void *
 	struct rpc_xprt *xprt = (struct rpc_xprt *)args;
 	struct socket *sock = xprt->sock;
 	int err, status = -EIO;
+	struct ve_struct *ve;
 
+	ve = set_exec_env(xprt->owner_env);
+	down_read(&ve->op_sem);
+	if (!ve->is_running)
+		goto out;
 	if (xprt->shutdown || xprt->addr.sin_port == 0)
 		goto out;
 
@@ -1164,6 +1176,8 @@ out:
 	xprt_wake_pending_tasks(xprt, status);
 out_clear:
 	xprt_clear_connecting(xprt);
+	up_read(&ve->op_sem);
+	(void)set_exec_env(ve);
 }
 
 /**
diff -upr linux-2.6.16.46-0.12.orig/net/unix/af_unix.c linux-2.6.16.46-0.12-027test011/net/unix/af_unix.c
--- linux-2.6.16.46-0.12.orig/net/unix/af_unix.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/unix/af_unix.c	2007-08-28 17:35:33.000000000 +0400
@@ -118,6 +118,9 @@
 #include <net/checksum.h>
 #include <linux/security.h>
 
+#include <ub/ub_net.h>
+#include <ub/beancounter.h>
+
 int sysctl_unix_max_dgram_qlen = 10;
 
 struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
@@ -235,6 +238,8 @@ static struct sock *__unix_find_socket_b
 	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
 		struct unix_sock *u = unix_sk(s);
 
+		if (!ve_accessible(s->owner_env, get_exec_env()))
+			continue;
 		if (u->addr->len == len &&
 		    !memcmp(u->addr->name, sunname, len))
 			goto found;
@@ -439,7 +444,7 @@ static int unix_listen(struct socket *so
 	sk->sk_max_ack_backlog	= backlog;
 	sk->sk_state		= TCP_LISTEN;
 	/* set credentials so connect can copy them */
-	sk->sk_peercred.pid	= current->tgid;
+	sk->sk_peercred.pid	= virt_tgid(current);
 	sk->sk_peercred.uid	= current->euid;
 	sk->sk_peercred.gid	= current->egid;
 	err = 0;
@@ -553,6 +558,8 @@ static struct sock * unix_create1(struct
 	sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1);
 	if (!sk)
 		goto out;
+	if (ub_other_sock_charge(sk))
+		goto out_sk_free;
 
 	atomic_inc(&unix_nr_socks);
 
@@ -571,6 +578,9 @@ static struct sock * unix_create1(struct
 	unix_insert_socket(unix_sockets_unbound, sk);
 out:
 	return sk;
+out_sk_free:
+	sk_free(sk);
+	return NULL;
 }
 
 static int unix_create(struct socket *sock, int protocol)
@@ -932,6 +942,7 @@ static int unix_stream_connect(struct so
 	int st;
 	int err;
 	long timeo;
+	unsigned long chargesize;
 
 	err = unix_mkname(sunaddr, addr_len, &hash);
 	if (err < 0)
@@ -960,6 +971,10 @@ static int unix_stream_connect(struct so
 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
 	if (skb == NULL)
 		goto out;
+	chargesize = skb_charge_fullsize(skb);
+	if (ub_sock_getwres_other(newsk, chargesize) < 0)
+		goto out;
+	ub_skb_set_charge(skb, newsk, chargesize, UB_OTHERSOCKBUF);
 
 restart:
 	/*  Find listening sock. */
@@ -1043,7 +1058,7 @@ restart:
 	unix_peer(newsk)	= sk;
 	newsk->sk_state		= TCP_ESTABLISHED;
 	newsk->sk_type		= sk->sk_type;
-	newsk->sk_peercred.pid	= current->tgid;
+	newsk->sk_peercred.pid	= virt_tgid(current);
 	newsk->sk_peercred.uid	= current->euid;
 	newsk->sk_peercred.gid	= current->egid;
 	newu = unix_sk(newsk);
@@ -1107,7 +1122,7 @@ static int unix_socketpair(struct socket
 	sock_hold(skb);
 	unix_peer(ska)=skb;
 	unix_peer(skb)=ska;
-	ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
+	ska->sk_peercred.pid = skb->sk_peercred.pid = virt_tgid(current);
 	ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
 	ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
 
@@ -1211,7 +1226,7 @@ static void unix_detach_fds(struct scm_c
 		unix_notinflight(scm->fp->fp[i]);
 }
 
-static void unix_destruct_fds(struct sk_buff *skb)
+void unix_destruct_fds(struct sk_buff *skb)
 {
 	struct scm_cookie scm;
 	memset(&scm, 0, sizeof(scm));
@@ -1222,6 +1237,7 @@ static void unix_destruct_fds(struct sk_
 	scm_destroy(&scm);
 	sock_wfree(skb);
 }
+EXPORT_SYMBOL_GPL(unix_destruct_fds);
 
 static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 {
@@ -1433,6 +1449,16 @@ static int unix_stream_sendmsg(struct ki
 
 		size=len-sent;
 
+		if (msg->msg_flags & MSG_DONTWAIT)
+			ub_sock_makewres_other(sk, skb_charge_size(size));
+		if (sock_bc(sk) != NULL &&
+				sock_bc(sk)->poll_reserv >=
+					SOCK_MIN_UBCSPACE &&
+				skb_charge_size(size) >
+					sock_bc(sk)->poll_reserv)
+			size = skb_charge_datalen(sock_bc(sk)->poll_reserv);
+
+
 		/* Keep two messages in the pipe so it schedules better */
 		if (size > sk->sk_sndbuf / 2 - 64)
 			size = sk->sk_sndbuf / 2 - 64;
@@ -1444,7 +1470,8 @@ static int unix_stream_sendmsg(struct ki
 		 *	Grab a buffer
 		 */
 		 
-		skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
+		skb = sock_alloc_send_skb2(sk, size, SOCK_MIN_UBCSPACE,
+				msg->msg_flags&MSG_DONTWAIT, &err);
 
 		if (skb==NULL)
 			goto out_err;
@@ -1869,6 +1896,7 @@ static unsigned int unix_poll(struct fil
 {
 	struct sock *sk = sock->sk;
 	unsigned int mask;
+	int no_ub_res;
 
 	poll_wait(file, sk->sk_sleep, wait);
 	mask = 0;
@@ -1879,6 +1907,10 @@ static unsigned int unix_poll(struct fil
 	if (sk->sk_shutdown == SHUTDOWN_MASK)
 		mask |= POLLHUP;
 
+	no_ub_res = ub_sock_makewres_other(sk, SOCK_MIN_UBCSPACE_CH);
+	if (no_ub_res)
+		ub_sock_sndqueueadd_other(sk, SOCK_MIN_UBCSPACE_CH);
+
 	/* readable? */
 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
 	    (sk->sk_shutdown & RCV_SHUTDOWN))
@@ -1892,7 +1924,7 @@ static unsigned int unix_poll(struct fil
 	 * we set writable also when the other side has shut down the
 	 * connection. This prevents stuck sockets.
 	 */
-	if (unix_writable(sk))
+	if (!no_ub_res && unix_writable(sk))
 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
 
 	return mask;
@@ -2044,7 +2076,7 @@ static int __init af_unix_init(void)
 
 	sock_register(&unix_family_ops);
 #ifdef CONFIG_PROC_FS
-	proc_net_fops_create("unix", 0, &unix_seq_fops);
+	proc_glob_fops_create("net/unix", 0, &unix_seq_fops);
 #endif
 	unix_sysctl_register();
 out:
@@ -2055,7 +2087,7 @@ static void __exit af_unix_exit(void)
 {
 	sock_unregister(PF_UNIX);
 	unix_sysctl_unregister();
-	proc_net_remove("unix");
+	remove_proc_glob_entry("net/unix", NULL);
 	proto_unregister(&unix_proto);
 }
 
diff -upr linux-2.6.16.46-0.12.orig/net/unix/garbage.c linux-2.6.16.46-0.12-027test011/net/unix/garbage.c
--- linux-2.6.16.46-0.12.orig/net/unix/garbage.c	2006-03-20 08:53:29.000000000 +0300
+++ linux-2.6.16.46-0.12-027test011/net/unix/garbage.c	2007-08-28 17:35:33.000000000 +0400
@@ -76,6 +76,7 @@
 #include <linux/netdevice.h>
 #include <linux/file.h>
 #include <linux/proc_fs.h>
+#include <linux/module.h>
 
 #include <net/sock.h>
 #include <net/af_unix.h>
@@ -135,7 +136,7 @@ void unix_notinflight(struct file *fp)
 		atomic_dec(&unix_tot_inflight);
 	}
 }
-
+EXPORT_SYMBOL_GPL(unix_notinflight);
 
 /*
  *	Garbage Collector Support Functions
diff -upr linux-2.6.16.46-0.12.orig/scripts/mod/modpost.c linux-2.6.16.46-0.12-027test011/scripts/mod/modpost.c
--- linux-2.6.16.46-0.12.orig/scripts/mod/modpost.c	2007-08-24 19:28:24.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/scripts/mod/modpost.c	2007-08-28 17:35:30.000000000 +0400
@@ -599,16 +599,20 @@ void add_supported_flag(struct buffer *b
 /**
  * Record CRCs for unresolved symbols
  **/
-static void add_versions(struct buffer *b, struct module *mod)
+static int add_versions(struct buffer *b, struct module *mod)
 {
 	struct symbol *s, *exp;
-
+	int err;
+	
+	err = 0;
 	for (s = mod->unres; s; s = s->next) {
 		exp = find_symbol(s->name);
 		if (!exp || exp->module == mod) {
-			if (have_vmlinux && !s->weak)
+			if (have_vmlinux && !s->weak) {
 				warn("\"%s\" [%s.ko] undefined!\n",
 				     s->name, mod->name);
+				err = 1;
+			}
 			continue;
 		}
 		s->module = exp->module;
@@ -617,7 +621,7 @@ static void add_versions(struct buffer *
 	}
 
 	if (!modversions)
-		return;
+		return err;
 
 	buf_printf(b, "\n");
 	buf_printf(b, "static const struct modversion_info ____versions[]\n");
@@ -637,6 +641,7 @@ static void add_versions(struct buffer *
 	}
 
 	buf_printf(b, "};\n");
+	return err;
 }
 
 static void add_depends(struct buffer *b, struct module *mod,
@@ -819,7 +824,7 @@ int main(int argc, char **argv)
 	char *kernel_read = NULL, *module_read = NULL;
 	char *dump_write = NULL;
 	const char *supported = NULL;
-	int opt;
+	int opt, err;
 
 	while ((opt = getopt(argc, argv, "i:I:mo:as:")) != -1) {
 		switch(opt) {
@@ -858,6 +863,7 @@ int main(int argc, char **argv)
 		read_symbols(argv[optind++]);
 	}
 
+	err = 0;
 	for (mod = modules; mod; mod = mod->next) {
 		if (mod->skip)
 			continue;
@@ -866,7 +872,7 @@ int main(int argc, char **argv)
 
 		add_header(&buf, mod);
 		add_supported_flag(&buf, mod);
-		add_versions(&buf, mod);
+		err |= add_versions(&buf, mod);
 		add_depends(&buf, mod, modules);
 		add_moddevtable(&buf, mod);
 		add_srcversion(&buf, mod);
@@ -878,5 +884,5 @@ int main(int argc, char **argv)
 	if (dump_write)
 		write_dump(dump_write);
 
-	return 0;
+	return err;
 }
diff -upr linux-2.6.16.46-0.12.orig/security/Kconfig linux-2.6.16.46-0.12-027test011/security/Kconfig
--- linux-2.6.16.46-0.12.orig/security/Kconfig	2007-08-24 19:28:24.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/security/Kconfig	2007-08-28 17:35:32.000000000 +0400
@@ -35,7 +35,7 @@ config KEYS_DEBUG_PROC_KEYS
 
 config SECURITY
 	bool "Enable different security models"
-	depends on SYSFS
+	depends on SYSFS && !VE
 	help
 	  This allows you to choose different security modules to be
 	  configured into your kernel.
diff -upr linux-2.6.16.46-0.12.orig/security/apparmor/main.c linux-2.6.16.46-0.12-027test011/security/apparmor/main.c
--- linux-2.6.16.46-0.12.orig/security/apparmor/main.c	2007-08-24 19:28:24.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/security/apparmor/main.c	2007-08-28 17:35:34.000000000 +0400
@@ -821,7 +821,7 @@ char *sd_get_name(struct dentry *dentry,
 	}
 
 out:
-	return name;
+	return IS_ERR(name) ? NULL : name;
 }
 
 /***********************************
diff -upr linux-2.6.16.46-0.12.orig/security/commoncap.c linux-2.6.16.46-0.12-027test011/security/commoncap.c
--- linux-2.6.16.46-0.12.orig/security/commoncap.c	2007-08-24 19:28:24.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/security/commoncap.c	2007-08-28 17:35:36.000000000 +0400
@@ -35,7 +35,8 @@ EXPORT_SYMBOL(cap_netlink_send);
 
 int cap_netlink_recv(struct sk_buff *skb)
 {
-	if (!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN))
+	if (!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN) &&
+		!cap_raised(NETLINK_CB(skb).eff_cap, CAP_VE_NET_ADMIN))
 		return -EPERM;
 	return 0;
 }
@@ -197,7 +198,7 @@ int cap_inode_setxattr(struct dentry *de
 {
 	if (!strncmp(name, XATTR_SECURITY_PREFIX,
 		     sizeof(XATTR_SECURITY_PREFIX) - 1)  &&
-	    !capable(CAP_SYS_ADMIN))
+	    !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_ADMIN))
 		return -EPERM;
 	return 0;
 }
@@ -206,7 +207,7 @@ int cap_inode_removexattr(struct dentry 
 {
 	if (!strncmp(name, XATTR_SECURITY_PREFIX,
 		     sizeof(XATTR_SECURITY_PREFIX) - 1)  &&
-	    !capable(CAP_SYS_ADMIN))
+	    !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_ADMIN))
 		return -EPERM;
 	return 0;
 }
@@ -312,7 +313,7 @@ void cap_task_reparent_to_init (struct t
 
 int cap_syslog (int type)
 {
-	if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN))
+	if ((type != 3 && type != 10) && !capable(CAP_VE_SYS_ADMIN))
 		return -EPERM;
 	return 0;
 }
diff -upr linux-2.6.16.46-0.12.orig/security/selinux/hooks.c linux-2.6.16.46-0.12-027test011/security/selinux/hooks.c
--- linux-2.6.16.46-0.12.orig/security/selinux/hooks.c	2007-08-24 19:28:26.000000000 +0400
+++ linux-2.6.16.46-0.12-027test011/security/selinux/hooks.c	2007-08-28 17:35:31.000000000 +0400
@@ -4152,12 +4152,12 @@ static int selinux_setprocattr(struct ta
 			struct task_struct *g, *t;
 			struct mm_struct *mm = p->mm;
 			read_lock(&tasklist_lock);
-			do_each_thread(g, t)
+			do_each_thread_ve(g, t)
 				if (t->mm == mm && t != p) {
 					read_unlock(&tasklist_lock);
 					return -EPERM;
 				}
-			while_each_thread(g, t);
+			while_each_thread_ve(g, t);
 			read_unlock(&tasklist_lock);
                 }
 
