summaryrefslogblamecommitdiffstats
path: root/scripts/namespace.pl
blob: 729c547fc9e1ef54400db1f06e5cf98c592bf290 (plain) (tree)
1
                   






























































                                                                               
             


               

                                                                 















                                                                                           
































                                             

                                 















                             





                             


  




















                                                                           

                                    









































                                                             
                             


















                                                                                


                                                             
                
                                                             


                                                                             


                                                            
                            
                                        








                                                                  

                                    




                                                                             


                                               
                   
                           












                                                                          
                                                                                              







                                                                              
                                
                                                         
                                                           









                                                                                                 
                                                      































                                                                    

                       
                           


                                               
























                                                                      
                                       

                                                       


                                                                          




                                                                            
                         
 
                                                                   
                                                             







                                                     

                                         
                    
                                            
                                              

                                                                         







                                                                                           
                                                                                  












                                                                                
                                                               









                                                                                     
















                                                                                   

                                       
                                            

                                                      








                                                                                    
                                                         

                                                            






                                                           



                         
#!/usr/bin/env perl
#
#	namespace.pl.  Mon Aug 30 2004
#
#	Perform a name space analysis on the linux kernel.
#
#	Copyright Keith Owens <kaos@ocs.com.au>.  GPL.
#
#	Invoke by changing directory to the top of the kernel object
#	tree then namespace.pl, no parameters.
#
#	Tuned for 2.1.x kernels with the new module handling, it will
#	work with 2.0 kernels as well.
#
#	Last change 2.6.9-rc1, adding support for separate source and object
#	trees.
#
#	The source must be compiled/assembled first, the object files
#	are the primary input to this script.  Incomplete or missing
#	objects will result in a flawed analysis.  Compile both vmlinux
#	and modules.
#
#	Even with complete objects, treat the result of the analysis
#	with caution.  Some external references are only used by
#	certain architectures, others with certain combinations of
#	configuration parameters.  Ideally the source should include
#	something like
#
#	#ifndef CONFIG_...
#	static
#	#endif
#	symbol_definition;
#
#	so the symbols are defined as static unless a particular
#	CONFIG_... requires it to be external.
#
#	A symbol that is suffixed with '(export only)' has these properties
#
#	* It is global.
#	* It is marked EXPORT_SYMBOL or EXPORT_SYMBOL_GPL, either in the same
#	  source file or a different source file.
#	* Given the current .config, nothing uses the symbol.
#
#	The symbol is a candidate for conversion to static, plus removal of the
#	export.  But be careful that a different .config might use the symbol.
#
#
#	Name space analysis and cleanup is an iterative process.  You cannot
#	expect to find all the problems in a single pass.
#
#	* Identify possibly unnecessary global declarations, verify that they
#	  really are unnecessary and change them to static.
#	* Compile and fix up gcc warnings about static, removing dead symbols
#	  as necessary.
#	* make clean and rebuild with different configs (especially
#	  CONFIG_MODULES=n) to see which symbols are being defined when the
#	  config does not require them.  These symbols bloat the kernel object
#	  for no good reason, which is frustrating for embedded systems.
#	* Wrap config sensitive symbols in #ifdef CONFIG_foo, as long as the
#	  code does not get too ugly.
#	* Repeat the name space analysis until you can live with with the
#	  result.
#

use warnings;
use strict;
use File::Find;

my $nm = ($ENV{'NM'} || "nm") . " -p";
my $objdump = ($ENV{'OBJDUMP'} || "objdump") . " -s -j .comment";
my $srctree = "";
my $objtree = "";
$srctree = "$ENV{'srctree'}/" if (exists($ENV{'srctree'}));
$objtree = "$ENV{'objtree'}/" if (exists($ENV{'objtree'}));

if ($#ARGV != -1) {
	print STDERR "usage: $0 takes no parameters\n";
	die("giving up\n");
}

my %nmdata = ();	# nm data for each object
my %def = ();		# all definitions for each name
my %ksymtab = ();	# names that appear in __ksymtab_
my %ref = ();		# $ref{$name} exists if there is a true external reference to $name
my %export = ();	# $export{$name} exists if there is an EXPORT_... of $name

my %nmexception = (
    'fs/ext3/bitmap'			=> 1,
    'fs/ext4/bitmap'			=> 1,
    'arch/x86/lib/thunk_32'		=> 1,
    'arch/x86/lib/cmpxchg'		=> 1,
    'arch/x86/vdso/vdso32/note'		=> 1,
    'lib/irq_regs'			=> 1,
    'usr/initramfs_data'		=> 1,
    'drivers/scsi/aic94xx/aic94xx_dump'	=> 1,
    'drivers/scsi/libsas/sas_dump'	=> 1,
    'lib/dec_and_lock'			=> 1,
    'drivers/ide/ide-probe-mini'	=> 1,
    'usr/initramfs_data'		=> 1,
    'drivers/acpi/acpia/exdump'		=> 1,
    'drivers/acpi/acpia/rsdump'		=> 1,
    'drivers/acpi/acpia/nsdumpdv'	=> 1,
    'drivers/acpi/acpia/nsdump'		=> 1,
    'arch/ia64/sn/kernel/sn2/io'	=> 1,
    'arch/ia64/kernel/gate-data'	=> 1,
    'security/capability'		=> 1,
    'fs/ntfs/sysctl'			=> 1,
    'fs/jfs/jfs_debug'			=> 1,
);

my %nameexception = (
    'mod_use_count_'	 => 1,
    '__initramfs_end'	=> 1,
    '__initramfs_start'	=> 1,
    '_einittext'	=> 1,
    '_sinittext'	=> 1,
    'kallsyms_names'	=> 1,
    'kallsyms_num_syms'	=> 1,
    'kallsyms_addresses'=> 1,
    'kallsyms_offsets'	=> 1,
    'kallsyms_relative_base'=> 1,
    '__this_module'	=> 1,
    '_etext'		=> 1,
    '_edata'		=> 1,
    '_end'		=> 1,
    '__bss_start'	=> 1,
    '_text'		=> 1,
    '_stext'		=> 1,
    '__gp'		=> 1,
    'ia64_unw_start'	=> 1,
    'ia64_unw_end'	=> 1,
    '__init_begin'	=> 1,
    '__init_end'	=> 1,
    '__bss_stop'	=> 1,
    '__nosave_begin'	=> 1,
    '__nosave_end'	=> 1,
    'pg0'		=> 1,
    'vdso_enabled'	=> 1,
    '__stack_chk_fail'  => 1,
    'VDSO32_PRELINK'	=> 1,
    'VDSO32_vsyscall'	=> 1,
    'VDSO32_rt_sigreturn'=>1,
    'VDSO32_sigreturn'	=> 1,
);


&find(\&linux_objects, '.');	# find the objects and do_nm on them
&list_multiply_defined();
&resolve_external_references();
&list_extra_externals();

exit(0);

sub linux_objects
{
	# Select objects, ignoring objects which are only created by
	# merging other objects.  Also ignore all of modules, scripts
	# and compressed.  Most conglomerate objects are handled by do_nm,
	# this list only contains the special cases.  These include objects
	# that are linked from just one other object and objects for which
	# there is really no permanent source file.
	my $basename = $_;
	$_ = $File::Find::name;
	s:^\./::;
	if (/.*\.o$/ &&
		! (
		m:/built-in.o$:
		|| m:arch/x86/vdso/:
		|| m:arch/x86/boot/:
		|| m:arch/ia64/ia32/ia32.o$:
		|| m:arch/ia64/kernel/gate-syms.o$:
		|| m:arch/ia64/lib/__divdi3.o$:
		|| m:arch/ia64/lib/__divsi3.o$:
		|| m:arch/ia64/lib/__moddi3.o$:
		|| m:arch/ia64/lib/__modsi3.o$:
		|| m:arch/ia64/lib/__udivdi3.o$:
		|| m:arch/ia64/lib/__udivsi3.o$:
		|| m:arch/ia64/lib/__umoddi3.o$:
		|| m:arch/ia64/lib/__umodsi3.o$:
		|| m:arch/ia64/scripts/check_gas_for_hint.o$:
		|| m:arch/ia64/sn/kernel/xp.o$:
		|| m:boot/bbootsect.o$:
		|| m:boot/bsetup.o$:
		|| m:/bootsect.o$:
		|| m:/boot/setup.o$:
		|| m:/compressed/:
		|| m:drivers/cdrom/driver.o$:
		|| m:drivers/char/drm/tdfx_drv.o$:
		|| m:drivers/ide/ide-detect.o$:
		|| m:drivers/ide/pci/idedriver-pci.o$:
		|| m:drivers/media/media.o$:
		|| m:drivers/scsi/sd_mod.o$:
		|| m:drivers/video/video.o$:
		|| m:fs/devpts/devpts.o$:
		|| m:fs/exportfs/exportfs.o$:
		|| m:fs/hugetlbfs/hugetlbfs.o$:
		|| m:fs/msdos/msdos.o$:
		|| m:fs/nls/nls.o$:
		|| m:fs/ramfs/ramfs.o$:
		|| m:fs/romfs/romfs.o$:
		|| m:fs/vfat/vfat.o$:
		|| m:init/mounts.o$:
		|| m:^modules/:
		|| m:net/netlink/netlink.o$:
		|| m:net/sched/sched.o$:
		|| m:/piggy.o$:
		|| m:^scripts/:
		|| m:sound/.*/snd-:
		|| m:^.*/\.tmp_:
		|| m:^\.tmp_:
		|| m:/vmlinux-obj.o$:
		|| m:^tools/:
		)
	) {
		do_nm($basename, $_);
	}
	$_ = $basename;		# File::Find expects $_ untouched (undocumented)
}

sub do_nm
{
	my ($basename, $fullname) = @_;
	my ($source, $type, $name);
	if (! -e $basename) {
		printf STDERR "$basename does not exist\n";
		return;
	}
	if ($fullname !~ /\.o$/) {
		printf STDERR "$fullname is not an object file\n";
		return;
	}
	($source = $basename) =~ s/\.o$//;
	if (-e "$source.c" || -e "$source.S") {
		$source = "$objtree$File::Find::dir/$source";
	} else {
		$source = "$srctree$File::Find::dir/$source";
	}
	if (! -e "$source.c" && ! -e "$source.S") {
		# No obvious source, exclude the object if it is conglomerate
	        open(my $objdumpdata, "$objdump $basename|")
		    or die "$objdump $fullname failed $!\n";

		my $comment;
		while (<$objdumpdata>) {
			chomp();
			if (/^In archive/) {
				# Archives are always conglomerate
				$comment = "GCC:GCC:";
				last;
			}
			next if (! /^[ 0-9a-f]{5,} /);
			$comment .= substr($_, 43);
		}
		close($objdumpdata);

		if (!defined($comment) || $comment !~ /GCC\:.*GCC\:/m) {
			printf STDERR "No source file found for $fullname\n";
		}
		return;
	}
	open (my $nmdata, "$nm $basename|")
	    or die "$nm $fullname failed $!\n";

	my @nmdata;
	while (<$nmdata>) {
		chop;
		($type, $name) = (split(/ +/, $_, 3))[1..2];
		# Expected types
		# A absolute symbol
		# B weak external reference to data that has been resolved
		# C global variable, uninitialised
		# D global variable, initialised
		# G global variable, initialised, small data section
		# R global array, initialised
		# S global variable, uninitialised, small bss
		# T global label/procedure
		# U external reference
		# W weak external reference to text that has been resolved
		# V similar to W, but the value of the weak symbol becomes zero with no error.
		# a assembler equate
		# b static variable, uninitialised
		# d static variable, initialised
		# g static variable, initialised, small data section
		# r static array, initialised
		# s static variable, uninitialised, small bss
		# t static label/procedures
		# w weak external reference to text that has not been resolved
		# v similar to w
		# ? undefined type, used a lot by modules
		if ($type !~ /^[ABCDGRSTUWVabdgrstwv?]$/) {
			printf STDERR "nm output for $fullname contains unknown type '$_'\n";
		}
		elsif ($name =~ /\./) {
			# name with '.' is local static
		}
		else {
			$type = 'R' if ($type eq '?');	# binutils replaced ? with R at one point
			# binutils keeps changing the type for exported symbols, force it to R
			$type = 'R' if ($name =~ /^__ksymtab/ || $name =~ /^__kstrtab/);
			$name =~ s/_R[a-f0-9]{8}$//;	# module versions adds this
			if ($type =~ /[ABCDGRSTWV]/ &&
				$name ne 'init_module' &&
				$name ne 'cleanup_module' &&
				$name ne 'Using_Versions' &&
				$name !~ /^Version_[0-9]+$/ &&
				$name !~ /^__parm_/ &&
				$name !~ /^__kstrtab/ &&
				$name !~ /^__ksymtab/ &&
				$name !~ /^__kcrctab_/ &&
				$name !~ /^__exitcall_/ &&
				$name !~ /^__initcall_/ &&
				$name !~ /^__kdb_initcall_/ &&
				$name !~ /^__kdb_exitcall_/ &&
				$name !~ /^__module_/ &&
				$name !~ /^__mod_/ &&
				$name !~ /^__crc_/ &&
				$name ne '__this_module' &&
				$name ne 'kernel_version') {
				if (!exists($def{$name})) {
					$def{$name} = [];
				}
				push(@{$def{$name}}, $fullname);
			}
			push(@nmdata, "$type $name");
			if ($name =~ /^__ksymtab_/) {
				$name = substr($name, 10);
				if (!exists($ksymtab{$name})) {
					$ksymtab{$name} = [];
				}
				push(@{$ksymtab{$name}}, $fullname);
			}
		}
	}
	close($nmdata);

	if ($#nmdata < 0) {
	    printf "No nm data for $fullname\n"
		unless $nmexception{$fullname};
	    return;
	}
	$nmdata{$fullname} = \@nmdata;
}

sub drop_def
{
	my ($object, $name) = @_;
	my $nmdata = $nmdata{$object};
	my ($i, $j);
	for ($i = 0; $i <= $#{$nmdata}; ++$i) {
		if ($name eq (split(' ', $nmdata->[$i], 2))[1]) {
			splice(@{$nmdata{$object}}, $i, 1);
			my $def = $def{$name};
			for ($j = 0; $j < $#{$def{$name}}; ++$j) {
				if ($def{$name}[$j] eq $object) {
					splice(@{$def{$name}}, $j, 1);
				}
			}
			last;
		}
	}
}

sub list_multiply_defined
{
	foreach my $name (keys(%def)) {
		if ($#{$def{$name}} > 0) {
			# Special case for cond_syscall
			if ($#{$def{$name}} == 1 &&
			   ($name =~ /^sys_/ || $name =~ /^compat_sys_/ ||
			    $name =~ /^sys32_/)) {
				if($def{$name}[0] eq "kernel/sys_ni.o" ||
				   $def{$name}[1] eq "kernel/sys_ni.o") {
					&drop_def("kernel/sys_ni.o", $name);
					next;
				}
			}

			printf "$name is multiply defined in :-\n";
			foreach my $module (@{$def{$name}}) {
				printf "\t$module\n";
			}
		}
	}
}

sub resolve_external_references
{
	my ($kstrtab, $ksymtab, $export);

	printf "\n";
	foreach my $object (keys(%nmdata)) {
		my $nmdata = $nmdata{$object};
		for (my $i = 0; $i <= $#{$nmdata}; ++$i) {
			my ($type, $name) = split(' ', $nmdata->[$i], 2);
			if ($type eq "U" || $type eq "w") {
				if (exists($def{$name}) || exists($ksymtab{$name})) {
					# add the owning object to the nmdata
					$nmdata->[$i] = "$type $name $object";
					# only count as a reference if it is not EXPORT_...
					$kstrtab = "R __kstrtab_$name";
					$ksymtab = "R __ksymtab_$name";
					$export = 0;
					for (my $j = 0; $j <= $#{$nmdata}; ++$j) {
						if ($nmdata->[$j] eq $kstrtab ||
						    $nmdata->[$j] eq $ksymtab) {
							$export = 1;
							last;
						}
					}
					if ($export) {
						$export{$name} = "";
					}
					else {
						$ref{$name} = ""
					}
				}
				elsif ( ! $nameexception{$name}
					&& $name !~ /^__sched_text_/
					&& $name !~ /^__start_/
					&& $name !~ /^__end_/
					&& $name !~ /^__stop_/
					&& $name !~ /^__scheduling_functions_.*_here/
					&& $name !~ /^__.*initcall_/
					&& $name !~ /^__.*per_cpu_start/
					&& $name !~ /^__.*per_cpu_end/
					&& $name !~ /^__alt_instructions/
					&& $name !~ /^__setup_/
					&& $name !~ /^__mod_timer/
					&& $name !~ /^__mod_page_state/
					&& $name !~ /^init_module/
					&& $name !~ /^cleanup_module/
				) {
					printf "Cannot resolve ";
					printf "weak " if ($type eq "w");
					printf "reference to $name from $object\n";
				}
			}
		}
	}
}

sub list_extra_externals
{
	my %noref = ();

	foreach my $name (keys(%def)) {
		if (! exists($ref{$name})) {
			my @module = @{$def{$name}};
			foreach my $module (@module) {
				if (! exists($noref{$module})) {
					$noref{$module} = [];
				}
				push(@{$noref{$module}}, $name);
			}
		}
	}
	if (%noref) {
		printf "\nExternally defined symbols with no external references\n";
		foreach my $module (sort(keys(%noref))) {
			printf "  $module\n";
			foreach (sort(@{$noref{$module}})) {
			    my $export;
			    if (exists($export{$_})) {
				$export = " (export only)";
			    } else {
				$export = "";
			    }
			    printf "    $_$export\n";
			}
		}
	}
}
class="hl com">/* return end-of-log address */ return lsn; } /* * NAME: lmWriteRecord() * * FUNCTION: move the log record to current log page * * PARAMETER: cd - commit descriptor * * RETURN: end-of-log address * * serialization: LOG_LOCK() held on entry/exit */ static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck) { int lsn = 0; /* end-of-log address */ struct lbuf *bp; /* dst log page buffer */ struct logpage *lp; /* dst log page */ caddr_t dst; /* destination address in log page */ int dstoffset; /* end-of-log offset in log page */ int freespace; /* free space in log page */ caddr_t p; /* src meta-data page */ caddr_t src; int srclen; int nbytes; /* number of bytes to move */ int i; int len; struct linelock *linelock; struct lv *lv; struct lvd *lvd; int l2linesize; len = 0; /* retrieve destination log page to write */ bp = (struct lbuf *) log->bp; lp = (struct logpage *) bp->l_ldata; dstoffset = log->eor; /* any log data to write ? */ if (tlck == NULL) goto moveLrd; /* * move log record data */ /* retrieve source meta-data page to log */ if (tlck->flag & tlckPAGELOCK) { p = (caddr_t) (tlck->mp->data); linelock = (struct linelock *) & tlck->lock; } /* retrieve source in-memory inode to log */ else if (tlck->flag & tlckINODELOCK) { if (tlck->type & tlckDTREE) p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot; else p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot; linelock = (struct linelock *) & tlck->lock; } #ifdef _JFS_WIP else if (tlck->flag & tlckINLINELOCK) { inlinelock = (struct inlinelock *) & tlck; p = (caddr_t) & inlinelock->pxd; linelock = (struct linelock *) & tlck; } #endif /* _JFS_WIP */ else { jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck); return 0; /* Probably should trap */ } l2linesize = linelock->l2linesize; moveData: ASSERT(linelock->index <= linelock->maxcnt); lv = linelock->lv; for (i = 0; i < linelock->index; i++, lv++) { if (lv->length == 0) continue; /* is page full ? */ if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) { /* page become full: move on to next page */ lmNextPage(log); bp = log->bp; lp = (struct logpage *) bp->l_ldata; dstoffset = LOGPHDRSIZE; } /* * move log vector data */ src = (u8 *) p + (lv->offset << l2linesize); srclen = lv->length << l2linesize; len += srclen; while (srclen > 0) { freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset; nbytes = min(freespace, srclen); dst = (caddr_t) lp + dstoffset; memcpy(dst, src, nbytes); dstoffset += nbytes; /* is page not full ? */ if (dstoffset < LOGPSIZE - LOGPTLRSIZE) break; /* page become full: move on to next page */ lmNextPage(log); bp = (struct lbuf *) log->bp; lp = (struct logpage *) bp->l_ldata; dstoffset = LOGPHDRSIZE; srclen -= nbytes; src += nbytes; } /* * move log vector descriptor */ len += 4; lvd = (struct lvd *) ((caddr_t) lp + dstoffset); lvd->offset = cpu_to_le16(lv->offset); lvd->length = cpu_to_le16(lv->length); dstoffset += 4; jfs_info("lmWriteRecord: lv offset:%d length:%d", lv->offset, lv->length); } if ((i = linelock->next)) { linelock = (struct linelock *) lid_to_tlock(i); goto moveData; } /* * move log record descriptor */ moveLrd: lrd->length = cpu_to_le16(len); src = (caddr_t) lrd; srclen = LOGRDSIZE; while (srclen > 0) { freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset; nbytes = min(freespace, srclen); dst = (caddr_t) lp + dstoffset; memcpy(dst, src, nbytes); dstoffset += nbytes; srclen -= nbytes; /* are there more to move than freespace of page ? */ if (srclen) goto pageFull; /* * end of log record descriptor */ /* update last log record eor */ log->eor = dstoffset; bp->l_eor = dstoffset; lsn = (log->page << L2LOGPSIZE) + dstoffset; if (lrd->type & cpu_to_le16(LOG_COMMIT)) { tblk->clsn = lsn; jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn, bp->l_eor); INCREMENT(lmStat.commit); /* # of commit */ /* * enqueue tblock for group commit: * * enqueue tblock of non-trivial/synchronous COMMIT * at tail of group commit queue * (trivial/asynchronous COMMITs are ignored by * group commit.) */ LOGGC_LOCK(log); /* init tblock gc state */ tblk->flag = tblkGC_QUEUE; tblk->bp = log->bp; tblk->pn = log->page; tblk->eor = log->eor; /* enqueue transaction to commit queue */ list_add_tail(&tblk->cqueue, &log->cqueue); LOGGC_UNLOCK(log); } jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x", le16_to_cpu(lrd->type), log->bp, log->page, dstoffset); /* page not full ? */ if (dstoffset < LOGPSIZE - LOGPTLRSIZE) return lsn; pageFull: /* page become full: move on to next page */ lmNextPage(log); bp = (struct lbuf *) log->bp; lp = (struct logpage *) bp->l_ldata; dstoffset = LOGPHDRSIZE; src += nbytes; } return lsn; } /* * NAME: lmNextPage() * * FUNCTION: write current page and allocate next page. * * PARAMETER: log * * RETURN: 0 * * serialization: LOG_LOCK() held on entry/exit */ static int lmNextPage(struct jfs_log * log) { struct logpage *lp; int lspn; /* log sequence page number */ int pn; /* current page number */ struct lbuf *bp; struct lbuf *nextbp; struct tblock *tblk; /* get current log page number and log sequence page number */ pn = log->page; bp = log->bp; lp = (struct logpage *) bp->l_ldata; lspn = le32_to_cpu(lp->h.page); LOGGC_LOCK(log); /* * write or queue the full page at the tail of write queue */ /* get the tail tblk on commit queue */ if (list_empty(&log->cqueue)) tblk = NULL; else tblk = list_entry(log->cqueue.prev, struct tblock, cqueue); /* every tblk who has COMMIT record on the current page, * and has not been committed, must be on commit queue * since tblk is queued at commit queueu at the time * of writing its COMMIT record on the page before * page becomes full (even though the tblk thread * who wrote COMMIT record may have been suspended * currently); */ /* is page bound with outstanding tail tblk ? */ if (tblk && tblk->pn == pn) { /* mark tblk for end-of-page */ tblk->flag |= tblkGC_EOP; if (log->cflag & logGC_PAGEOUT) { /* if page is not already on write queue, * just enqueue (no lbmWRITE to prevent redrive) * buffer to wqueue to ensure correct serial order * of the pages since log pages will be added * continuously */ if (bp->l_wqnext == NULL) lbmWrite(log, bp, 0, 0); } else { /* * No current GC leader, initiate group commit */ log->cflag |= logGC_PAGEOUT; lmGCwrite(log, 0); } } /* page is not bound with outstanding tblk: * init write or mark it to be redriven (lbmWRITE) */ else { /* finalize the page */ bp->l_ceor = bp->l_eor; lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor); lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0); } LOGGC_UNLOCK(log); /* * allocate/initialize next page */ /* if log wraps, the first data page of log is 2 * (0 never used, 1 is superblock). */ log->page = (pn == log->size - 1) ? 2 : pn + 1; log->eor = LOGPHDRSIZE; /* ? valid page empty/full at logRedo() */ /* allocate/initialize next log page buffer */ nextbp = lbmAllocate(log, log->page); nextbp->l_eor = log->eor; log->bp = nextbp; /* initialize next log page */ lp = (struct logpage *) nextbp->l_ldata; lp->h.page = lp->t.page = cpu_to_le32(lspn + 1); lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE); return 0; } /* * NAME: lmGroupCommit() * * FUNCTION: group commit * initiate pageout of the pages with COMMIT in the order of * page number - redrive pageout of the page at the head of * pageout queue until full page has been written. * * RETURN: * * NOTE: * LOGGC_LOCK serializes log group commit queue, and * transaction blocks on the commit queue. * N.B. LOG_LOCK is NOT held during lmGroupCommit(). */ int lmGroupCommit(struct jfs_log * log, struct tblock * tblk) { int rc = 0; LOGGC_LOCK(log); /* group committed already ? */ if (tblk->flag & tblkGC_COMMITTED) { if (tblk->flag & tblkGC_ERROR) rc = -EIO; LOGGC_UNLOCK(log); return rc; } jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc); if (tblk->xflag & COMMIT_LAZY) tblk->flag |= tblkGC_LAZY; if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) && (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low)) { /* * No pageout in progress * * start group commit as its group leader. */ log->cflag |= logGC_PAGEOUT; lmGCwrite(log, 0); } if (tblk->xflag & COMMIT_LAZY) { /* * Lazy transactions can leave now */ LOGGC_UNLOCK(log); return 0; } /* lmGCwrite gives up LOGGC_LOCK, check again */ if (tblk->flag & tblkGC_COMMITTED) { if (tblk->flag & tblkGC_ERROR) rc = -EIO; LOGGC_UNLOCK(log); return rc; } /* upcount transaction waiting for completion */ log->gcrtc++; tblk->flag |= tblkGC_READY; __SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED), LOGGC_LOCK(log), LOGGC_UNLOCK(log)); /* removed from commit queue */ if (tblk->flag & tblkGC_ERROR) rc = -EIO; LOGGC_UNLOCK(log); return rc; } /* * NAME: lmGCwrite() * * FUNCTION: group commit write * initiate write of log page, building a group of all transactions * with commit records on that page. * * RETURN: None * * NOTE: * LOGGC_LOCK must be held by caller. * N.B. LOG_LOCK is NOT held during lmGroupCommit(). */ static void lmGCwrite(struct jfs_log * log, int cant_write) { struct lbuf *bp; struct logpage *lp; int gcpn; /* group commit page number */ struct tblock *tblk; struct tblock *xtblk = NULL; /* * build the commit group of a log page * * scan commit queue and make a commit group of all * transactions with COMMIT records on the same log page. */ /* get the head tblk on the commit queue */ gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn; list_for_each_entry(tblk, &log->cqueue, cqueue) { if (tblk->pn != gcpn) break; xtblk = tblk; /* state transition: (QUEUE, READY) -> COMMIT */ tblk->flag |= tblkGC_COMMIT; } tblk = xtblk; /* last tblk of the page */ /* * pageout to commit transactions on the log page. */ bp = (struct lbuf *) tblk->bp; lp = (struct logpage *) bp->l_ldata; /* is page already full ? */ if (tblk->flag & tblkGC_EOP) { /* mark page to free at end of group commit of the page */ tblk->flag &= ~tblkGC_EOP; tblk->flag |= tblkGC_FREE; bp->l_ceor = bp->l_eor; lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor); lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC, cant_write); INCREMENT(lmStat.full_page); } /* page is not yet full */ else { bp->l_ceor = tblk->eor; /* ? bp->l_ceor = bp->l_eor; */ lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor); lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write); INCREMENT(lmStat.partial_page); } } /* * NAME: lmPostGC() * * FUNCTION: group commit post-processing * Processes transactions after their commit records have been written * to disk, redriving log I/O if necessary. * * RETURN: None * * NOTE: * This routine is called a interrupt time by lbmIODone */ static void lmPostGC(struct lbuf * bp) { unsigned long flags; struct jfs_log *log = bp->l_log; struct logpage *lp; struct tblock *tblk, *temp; //LOGGC_LOCK(log); spin_lock_irqsave(&log->gclock, flags); /* * current pageout of group commit completed. * * remove/wakeup transactions from commit queue who were * group committed with the current log page */ list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) { if (!(tblk->flag & tblkGC_COMMIT)) break; /* if transaction was marked GC_COMMIT then * it has been shipped in the current pageout * and made it to disk - it is committed. */ if (bp->l_flag & lbmERROR) tblk->flag |= tblkGC_ERROR; /* remove it from the commit queue */ list_del(&tblk->cqueue); tblk->flag &= ~tblkGC_QUEUE; if (tblk == log->flush_tblk) { /* we can stop flushing the log now */ clear_bit(log_FLUSH, &log->flag); log->flush_tblk = NULL; } jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk, tblk->flag); if (!(tblk->xflag & COMMIT_FORCE)) /* * Hand tblk over to lazy commit thread */ txLazyUnlock(tblk); else { /* state transition: COMMIT -> COMMITTED */ tblk->flag |= tblkGC_COMMITTED; if (tblk->flag & tblkGC_READY) log->gcrtc--; LOGGC_WAKEUP(tblk); } /* was page full before pageout ? * (and this is the last tblk bound with the page) */ if (tblk->flag & tblkGC_FREE) lbmFree(bp); /* did page become full after pageout ? * (and this is the last tblk bound with the page) */ else if (tblk->flag & tblkGC_EOP) { /* finalize the page */ lp = (struct logpage *) bp->l_ldata; bp->l_ceor = bp->l_eor; lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor); jfs_info("lmPostGC: calling lbmWrite"); lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 1); } } /* are there any transactions who have entered lnGroupCommit() * (whose COMMITs are after that of the last log page written. * They are waiting for new group commit (above at (SLEEP 1)) * or lazy transactions are on a full (queued) log page, * select the latest ready transaction as new group leader and * wake her up to lead her group. */ if ((!list_empty(&log->cqueue)) && ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) || test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low)) /* * Call lmGCwrite with new group leader */ lmGCwrite(log, 1); /* no transaction are ready yet (transactions are only just * queued (GC_QUEUE) and not entered for group commit yet). * the first transaction entering group commit * will elect herself as new group leader. */ else log->cflag &= ~logGC_PAGEOUT; //LOGGC_UNLOCK(log); spin_unlock_irqrestore(&log->gclock, flags); return; } /* * NAME: lmLogSync() * * FUNCTION: write log SYNCPT record for specified log * if new sync address is available * (normally the case if sync() is executed by back-ground * process). * if not, explicitly run jfs_blogsync() to initiate * getting of new sync address. * calculate new value of i_nextsync which determines when * this code is called again. * * PARAMETERS: log - log structure * nosyncwait - 1 if called asynchronously * * RETURN: 0 * * serialization: LOG_LOCK() held on entry/exit */ static int lmLogSync(struct jfs_log * log, int nosyncwait) { int logsize; int written; /* written since last syncpt */ int free; /* free space left available */ int delta; /* additional delta to write normally */ int more; /* additional write granted */ struct lrd lrd; int lsn; struct logsyncblk *lp; struct jfs_sb_info *sbi; unsigned long flags; /* push dirty metapages out to disk */ list_for_each_entry(sbi, &log->sb_list, log_list) { filemap_flush(sbi->ipbmap->i_mapping); filemap_flush(sbi->ipimap->i_mapping); filemap_flush(sbi->direct_inode->i_mapping); } /* * forward syncpt */ /* if last sync is same as last syncpt, * invoke sync point forward processing to update sync. */ if (log->sync == log->syncpt) { LOGSYNC_LOCK(log, flags); if (list_empty(&log->synclist)) log->sync = log->lsn; else { lp = list_entry(log->synclist.next, struct logsyncblk, synclist); log->sync = lp->lsn; } LOGSYNC_UNLOCK(log, flags); } /* if sync is different from last syncpt, * write a SYNCPT record with syncpt = sync. * reset syncpt = sync */ if (log->sync != log->syncpt) { lrd.logtid = 0; lrd.backchain = 0; lrd.type = cpu_to_le16(LOG_SYNCPT); lrd.length = 0; lrd.log.syncpt.sync = cpu_to_le32(log->sync); lsn = lmWriteRecord(log, NULL, &lrd, NULL); log->syncpt = log->sync; } else lsn = log->lsn; /* * setup next syncpt trigger (SWAG) */ logsize = log->logsize; logdiff(written, lsn, log); free = logsize - written; delta = LOGSYNC_DELTA(logsize); more = min(free / 2, delta); if (more < 2 * LOGPSIZE) { jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n"); /* * log wrapping * * option 1 - panic ? No.! * option 2 - shutdown file systems * associated with log ? * option 3 - extend log ? */ /* * option 4 - second chance * * mark log wrapped, and continue. * when all active transactions are completed, * mark log vaild for recovery. * if crashed during invalid state, log state * implies invald log, forcing fsck(). */ /* mark log state log wrap in log superblock */ /* log->state = LOGWRAP; */ /* reset sync point computation */ log->syncpt = log->sync = lsn; log->nextsync = delta; } else /* next syncpt trigger = written + more */ log->nextsync = written + more; /* return if lmLogSync() from outside of transaction, e.g., sync() */ if (nosyncwait) return lsn; /* if number of bytes written from last sync point is more * than 1/4 of the log size, stop new transactions from * starting until all current transactions are completed * by setting syncbarrier flag. */ if (written > LOGSYNC_BARRIER(logsize) && logsize > 32 * LOGPSIZE) { set_bit(log_SYNCBARRIER, &log->flag); jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn, log->syncpt); /* * We may have to initiate group commit */ jfs_flush_journal(log, 0); } return lsn; } /* * NAME: jfs_syncpt * * FUNCTION: write log SYNCPT record for specified log * * PARAMETERS: log - log structure */ void jfs_syncpt(struct jfs_log *log) { LOG_LOCK(log); lmLogSync(log, 1); LOG_UNLOCK(log); } /* * NAME: lmLogOpen() * * FUNCTION: open the log on first open; * insert filesystem in the active list of the log. * * PARAMETER: ipmnt - file system mount inode * iplog - log inode (out) * * RETURN: * * serialization: */ int lmLogOpen(struct super_block *sb) { int rc; struct block_device *bdev; struct jfs_log *log; struct jfs_sb_info *sbi = JFS_SBI(sb); if (sbi->flag & JFS_NOINTEGRITY) return open_dummy_log(sb); if (sbi->mntflag & JFS_INLINELOG) return open_inline_log(sb); down(&jfs_log_sem); list_for_each_entry(log, &jfs_external_logs, journal_list) { if (log->bdev->bd_dev == sbi->logdev) { if (memcmp(log->uuid, sbi->loguuid, sizeof(log->uuid))) { jfs_warn("wrong uuid on JFS journal\n"); up(&jfs_log_sem); return -EINVAL; } /* * add file system to log active file system list */ if ((rc = lmLogFileSystem(log, sbi, 1))) { up(&jfs_log_sem); return rc; } goto journal_found; } } if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL))) { up(&jfs_log_sem); return -ENOMEM; } memset(log, 0, sizeof(struct jfs_log)); INIT_LIST_HEAD(&log->sb_list); init_waitqueue_head(&log->syncwait); /* * external log as separate logical volume * * file systems to log may have n-to-1 relationship; */ bdev = open_by_devnum(sbi->logdev, FMODE_READ|FMODE_WRITE); if (IS_ERR(bdev)) { rc = -PTR_ERR(bdev); goto free; } if ((rc = bd_claim(bdev, log))) { goto close; } log->bdev = bdev; memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid)); /* * initialize log: */ if ((rc = lmLogInit(log))) goto unclaim; list_add(&log->journal_list, &jfs_external_logs); /* * add file system to log active file system list */ if ((rc = lmLogFileSystem(log, sbi, 1))) goto shutdown; journal_found: LOG_LOCK(log); list_add(&sbi->log_list, &log->sb_list); sbi->log = log; LOG_UNLOCK(log); up(&jfs_log_sem); return 0; /* * unwind on error */ shutdown: /* unwind lbmLogInit() */ list_del(&log->journal_list); lbmLogShutdown(log); unclaim: bd_release(bdev); close: /* close external log device */ blkdev_put(bdev); free: /* free log descriptor */ up(&jfs_log_sem); kfree(log); jfs_warn("lmLogOpen: exit(%d)", rc); return rc; } static int open_inline_log(struct super_block *sb) { struct jfs_log *log; int rc; if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL))) return -ENOMEM; memset(log, 0, sizeof(struct jfs_log)); INIT_LIST_HEAD(&log->sb_list); init_waitqueue_head(&log->syncwait); set_bit(log_INLINELOG, &log->flag); log->bdev = sb->s_bdev; log->base = addressPXD(&JFS_SBI(sb)->logpxd); log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >> (L2LOGPSIZE - sb->s_blocksize_bits); log->l2bsize = sb->s_blocksize_bits; ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits); /* * initialize log. */ if ((rc = lmLogInit(log))) { kfree(log); jfs_warn("lmLogOpen: exit(%d)", rc); return rc; } list_add(&JFS_SBI(sb)->log_list, &log->sb_list); JFS_SBI(sb)->log = log; return rc; } static int open_dummy_log(struct super_block *sb) { int rc; down(&jfs_log_sem); if (!dummy_log) { dummy_log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL); if (!dummy_log) { up(&jfs_log_sem); return -ENOMEM; } memset(dummy_log, 0, sizeof(struct jfs_log)); INIT_LIST_HEAD(&dummy_log->sb_list); init_waitqueue_head(&dummy_log->syncwait); dummy_log->no_integrity = 1; /* Make up some stuff */ dummy_log->base = 0; dummy_log->size = 1024; rc = lmLogInit(dummy_log); if (rc) { kfree(dummy_log); dummy_log = NULL; up(&jfs_log_sem); return rc; } } LOG_LOCK(dummy_log); list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list); JFS_SBI(sb)->log = dummy_log; LOG_UNLOCK(dummy_log); up(&jfs_log_sem); return 0; } /* * NAME: lmLogInit() * * FUNCTION: log initialization at first log open. * * logredo() (or logformat()) should have been run previously. * initialize the log from log superblock. * set the log state in the superblock to LOGMOUNT and * write SYNCPT log record. * * PARAMETER: log - log structure * * RETURN: 0 - if ok * -EINVAL - bad log magic number or superblock dirty * error returned from logwait() * * serialization: single first open thread */ int lmLogInit(struct jfs_log * log) { int rc = 0; struct lrd lrd; struct logsuper *logsuper; struct lbuf *bpsuper; struct lbuf *bp; struct logpage *lp; int lsn = 0; jfs_info("lmLogInit: log:0x%p", log); /* initialize the group commit serialization lock */ LOGGC_LOCK_INIT(log); /* allocate/initialize the log write serialization lock */ LOG_LOCK_INIT(log); LOGSYNC_LOCK_INIT(log); INIT_LIST_HEAD(&log->synclist); INIT_LIST_HEAD(&log->cqueue); log->flush_tblk = NULL; log->count = 0; /* * initialize log i/o */ if ((rc = lbmLogInit(log))) return rc; if (!test_bit(log_INLINELOG, &log->flag)) log->l2bsize = L2LOGPSIZE; /* check for disabled journaling to disk */ if (log->no_integrity) { /* * Journal pages will still be filled. When the time comes * to actually do the I/O, the write is not done, and the * endio routine is called directly. */ bp = lbmAllocate(log , 0); log->bp = bp; bp->l_pn = bp->l_eor = 0; } else { /* * validate log superblock */ if ((rc = lbmRead(log, 1, &bpsuper))) goto errout10; logsuper = (struct logsuper *) bpsuper->l_ldata; if (logsuper->magic != cpu_to_le32(LOGMAGIC)) { jfs_warn("*** Log Format Error ! ***"); rc = -EINVAL; goto errout20; } /* logredo() should have been run successfully. */ if (logsuper->state != cpu_to_le32(LOGREDONE)) { jfs_warn("*** Log Is Dirty ! ***"); rc = -EINVAL; goto errout20; } /* initialize log from log superblock */ if (test_bit(log_INLINELOG,&log->flag)) { if (log->size != le32_to_cpu(logsuper->size)) { rc = -EINVAL; goto errout20; } jfs_info("lmLogInit: inline log:0x%p base:0x%Lx " "size:0x%x", log, (unsigned long long) log->base, log->size); } else { if (memcmp(logsuper->uuid, log->uuid, 16)) { jfs_warn("wrong uuid on JFS log device"); goto errout20; } log->size = le32_to_cpu(logsuper->size); log->l2bsize = le32_to_cpu(logsuper->l2bsize); jfs_info("lmLogInit: external log:0x%p base:0x%Lx " "size:0x%x", log, (unsigned long long) log->base, log->size); } log->page = le32_to_cpu(logsuper->end) / LOGPSIZE; log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page); /* * initialize for log append write mode */ /* establish current/end-of-log page/buffer */ if ((rc = lbmRead(log, log->page, &bp))) goto errout20; lp = (struct logpage *) bp->l_ldata; jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d", le32_to_cpu(logsuper->end), log->page, log->eor, le16_to_cpu(lp->h.eor)); log->bp = bp; bp->l_pn = log->page; bp->l_eor = log->eor; /* if current page is full, move on to next page */ if (log->eor >= LOGPSIZE - LOGPTLRSIZE) lmNextPage(log); /* * initialize log syncpoint */ /* * write the first SYNCPT record with syncpoint = 0 * (i.e., log redo up to HERE !); * remove current page from lbm write queue at end of pageout * (to write log superblock update), but do not release to * freelist; */ lrd.logtid = 0; lrd.backchain = 0; lrd.type = cpu_to_le16(LOG_SYNCPT); lrd.length = 0; lrd.log.syncpt.sync = 0; lsn = lmWriteRecord(log, NULL, &lrd, NULL); bp = log->bp; bp->l_ceor = bp->l_eor; lp = (struct logpage *) bp->l_ldata; lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor); lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0); if ((rc = lbmIOWait(bp, 0))) goto errout30; /* * update/write superblock */ logsuper->state = cpu_to_le32(LOGMOUNT); log->serial = le32_to_cpu(logsuper->serial) + 1; logsuper->serial = cpu_to_le32(log->serial); lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC); if ((rc = lbmIOWait(bpsuper, lbmFREE))) goto errout30; } /* initialize logsync parameters */ log->logsize = (log->size - 2) << L2LOGPSIZE; log->lsn = lsn; log->syncpt = lsn; log->sync = log->syncpt; log->nextsync = LOGSYNC_DELTA(log->logsize); jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x", log->lsn, log->syncpt, log->sync); /* * initialize for lazy/group commit */ log->clsn = lsn; return 0; /* * unwind on error */ errout30: /* release log page */ log->wqueue = NULL; bp->l_wqnext = NULL; lbmFree(bp); errout20: /* release log superblock */ lbmFree(bpsuper); errout10: /* unwind lbmLogInit() */ lbmLogShutdown(log); jfs_warn("lmLogInit: exit(%d)", rc); return rc; } /* * NAME: lmLogClose() * * FUNCTION: remove file system <ipmnt> from active list of log <iplog> * and close it on last close. * * PARAMETER: sb - superblock * * RETURN: errors from subroutines * * serialization: */ int lmLogClose(struct super_block *sb) { struct jfs_sb_info *sbi = JFS_SBI(sb); struct jfs_log *log = sbi->log; struct block_device *bdev; int rc = 0; jfs_info("lmLogClose: log:0x%p", log); down(&jfs_log_sem); LOG_LOCK(log); list_del(&sbi->log_list); LOG_UNLOCK(log); sbi->log = NULL; /* * We need to make sure all of the "written" metapages * actually make it to disk */ sync_blockdev(sb->s_bdev); if (test_bit(log_INLINELOG, &log->flag)) { /* * in-line log in host file system */ rc = lmLogShutdown(log); kfree(log); goto out; } if (!log->no_integrity) lmLogFileSystem(log, sbi, 0); if (!list_empty(&log->sb_list)) goto out; /* * TODO: ensure that the dummy_log is in a state to allow * lbmLogShutdown to deallocate all the buffers and call * kfree against dummy_log. For now, leave dummy_log & its * buffers in memory, and resuse if another no-integrity mount * is requested. */ if (log->no_integrity) goto out; /* * external log as separate logical volume */ list_del(&log->journal_list); bdev = log->bdev; rc = lmLogShutdown(log); bd_release(bdev); blkdev_put(bdev); kfree(log); out: up(&jfs_log_sem); jfs_info("lmLogClose: exit(%d)", rc); return rc; } /* * NAME: jfs_flush_journal() * * FUNCTION: initiate write of any outstanding transactions to the journal * and optionally wait until they are all written to disk * * wait == 0 flush until latest txn is committed, don't wait * wait == 1 flush until latest txn is committed, wait * wait > 1 flush until all txn's are complete, wait */ void jfs_flush_journal(struct jfs_log *log, int wait) { int i; struct tblock *target = NULL; struct jfs_sb_info *sbi; /* jfs_write_inode may call us during read-only mount */ if (!log) return; jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait); LOGGC_LOCK(log); if (!list_empty(&log->cqueue)) { /* * This ensures that we will keep writing to the journal as long * as there are unwritten commit records */ target = list_entry(log->cqueue.prev, struct tblock, cqueue); if (test_bit(log_FLUSH, &log->flag)) { /* * We're already flushing. * if flush_tblk is NULL, we are flushing everything, * so leave it that way. Otherwise, update it to the * latest transaction */ if (log->flush_tblk) log->flush_tblk = target; } else { /* Only flush until latest transaction is committed */ log->flush_tblk = target; set_bit(log_FLUSH, &log->flag); /* * Initiate I/O on outstanding transactions */ if (!(log->cflag & logGC_PAGEOUT)) { log->cflag |= logGC_PAGEOUT; lmGCwrite(log, 0); } } } if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) { /* Flush until all activity complete */ set_bit(log_FLUSH, &log->flag); log->flush_tblk = NULL; } if (wait && target && !(target->flag & tblkGC_COMMITTED)) { DECLARE_WAITQUEUE(__wait, current); add_wait_queue(&target->gcwait, &__wait); set_current_state(TASK_UNINTERRUPTIBLE); LOGGC_UNLOCK(log); schedule(); current->state = TASK_RUNNING; LOGGC_LOCK(log); remove_wait_queue(&target->gcwait, &__wait); } LOGGC_UNLOCK(log); if (wait < 2) return; list_for_each_entry(sbi, &log->sb_list, log_list) { filemap_fdatawrite(sbi->ipbmap->i_mapping); filemap_fdatawrite(sbi->ipimap->i_mapping); filemap_fdatawrite(sbi->direct_inode->i_mapping); } /* * If there was recent activity, we may need to wait * for the lazycommit thread to catch up */ if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) { for (i = 0; i < 200; i++) { /* Too much? */ msleep(250); if (list_empty(&log->cqueue) && list_empty(&log->synclist)) break; } } assert(list_empty(&log->cqueue)); if (!list_empty(&log->synclist)) { struct logsyncblk *lp; list_for_each_entry(lp, &log->synclist, synclist) { if (lp->xflag & COMMIT_PAGE) { struct metapage *mp = (struct metapage *)lp; dump_mem("orphan metapage", lp, sizeof(struct metapage)); dump_mem("page", mp->page, sizeof(struct page)); } else dump_mem("orphan tblock", lp, sizeof(struct tblock)); } // current->state = TASK_INTERRUPTIBLE; // schedule(); } //assert(list_empty(&log->synclist)); clear_bit(log_FLUSH, &log->flag); } /* * NAME: lmLogShutdown() * * FUNCTION: log shutdown at last LogClose(). * * write log syncpt record. * update super block to set redone flag to 0. * * PARAMETER: log - log inode * * RETURN: 0 - success * * serialization: single last close thread */ int lmLogShutdown(struct jfs_log * log) { int rc; struct lrd lrd; int lsn; struct logsuper *logsuper; struct lbuf *bpsuper; struct lbuf *bp; struct logpage *lp; jfs_info("lmLogShutdown: log:0x%p", log); jfs_flush_journal(log, 2); /* * write the last SYNCPT record with syncpoint = 0 * (i.e., log redo up to HERE !) */ lrd.logtid = 0; lrd.backchain = 0; lrd.type = cpu_to_le16(LOG_SYNCPT); lrd.length = 0; lrd.log.syncpt.sync = 0; lsn = lmWriteRecord(log, NULL, &lrd, NULL); bp = log->bp; lp = (struct logpage *) bp->l_ldata; lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor); lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0); lbmIOWait(log->bp, lbmFREE); log->bp = NULL; /* * synchronous update log superblock * mark log state as shutdown cleanly * (i.e., Log does not need to be replayed). */ if ((rc = lbmRead(log, 1, &bpsuper))) goto out; logsuper = (struct logsuper *) bpsuper->l_ldata; logsuper->state = cpu_to_le32(LOGREDONE); logsuper->end = cpu_to_le32(lsn); lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC); rc = lbmIOWait(bpsuper, lbmFREE); jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d", lsn, log->page, log->eor); out: /* * shutdown per log i/o */ lbmLogShutdown(log); if (rc) { jfs_warn("lmLogShutdown: exit(%d)", rc); } return rc; } /* * NAME: lmLogFileSystem() * * FUNCTION: insert (<activate> = true)/remove (<activate> = false) * file system into/from log active file system list. * * PARAMETE: log - pointer to logs inode. * fsdev - kdev_t of filesystem. * serial - pointer to returned log serial number * activate - insert/remove device from active list. * * RETURN: 0 - success * errors returned by vms_iowait(). */ static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi, int activate) { int rc = 0; int i; struct logsuper *logsuper; struct lbuf *bpsuper; char *uuid = sbi->uuid; /* * insert/remove file system device to log active file system list. */ if ((rc = lbmRead(log, 1, &bpsuper))) return rc; logsuper = (struct logsuper *) bpsuper->l_ldata; if (activate) { for (i = 0; i < MAX_ACTIVE; i++) if (!memcmp(logsuper->active[i].uuid, NULL_UUID, 16)) { memcpy(logsuper->active[i].uuid, uuid, 16); sbi->aggregate = i; break; } if (i == MAX_ACTIVE) { jfs_warn("Too many file systems sharing journal!"); lbmFree(bpsuper); return -EMFILE; /* Is there a better rc? */ } } else { for (i = 0; i < MAX_ACTIVE; i++) if (!memcmp(logsuper->active[i].uuid, uuid, 16)) { memcpy(logsuper->active[i].uuid, NULL_UUID, 16); break; } if (i == MAX_ACTIVE) { jfs_warn("Somebody stomped on the journal!"); lbmFree(bpsuper); return -EIO; } } /* * synchronous write log superblock: * * write sidestream bypassing write queue: * at file system mount, log super block is updated for * activation of the file system before any log record * (MOUNT record) of the file system, and at file system * unmount, all meta data for the file system has been * flushed before log super block is updated for deactivation * of the file system. */ lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC); rc = lbmIOWait(bpsuper, lbmFREE); return rc; } /* * log buffer manager (lbm) * ------------------------ * * special purpose buffer manager supporting log i/o requirements. * * per log write queue: * log pageout occurs in serial order by fifo write queue and * restricting to a single i/o in pregress at any one time. * a circular singly-linked list * (log->wrqueue points to the tail, and buffers are linked via * bp->wrqueue field), and * maintains log page in pageout ot waiting for pageout in serial pageout. */ /* * lbmLogInit() * * initialize per log I/O setup at lmLogInit() */ static int lbmLogInit(struct jfs_log * log) { /* log inode */ int i; struct lbuf *lbuf; jfs_info("lbmLogInit: log:0x%p", log); /* initialize current buffer cursor */ log->bp = NULL; /* initialize log device write queue */ log->wqueue = NULL; /* * Each log has its own buffer pages allocated to it. These are * not managed by the page cache. This ensures that a transaction * writing to the log does not block trying to allocate a page from * the page cache (for the log). This would be bad, since page * allocation waits on the kswapd thread that may be committing inodes * which would cause log activity. Was that clear? I'm trying to * avoid deadlock here. */ init_waitqueue_head(&log->free_wait); log->lbuf_free = NULL; for (i = 0; i < LOGPAGES;) { char *buffer; uint offset; struct page *page; buffer = (char *) get_zeroed_page(GFP_KERNEL); if (buffer == NULL) goto error; page = virt_to_page(buffer); for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) { lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL); if (lbuf == NULL) { if (offset == 0) free_page((unsigned long) buffer); goto error; } if (offset) /* we already have one reference */ get_page(page); lbuf->l_offset = offset; lbuf->l_ldata = buffer + offset; lbuf->l_page = page; lbuf->l_log = log; init_waitqueue_head(&lbuf->l_ioevent); lbuf->l_freelist = log->lbuf_free; log->lbuf_free = lbuf; i++; } } return (0); error: lbmLogShutdown(log); return -ENOMEM; } /* * lbmLogShutdown() * * finalize per log I/O setup at lmLogShutdown() */ static void lbmLogShutdown(struct jfs_log * log) { struct lbuf *lbuf; jfs_info("lbmLogShutdown: log:0x%p", log); lbuf = log->lbuf_free; while (lbuf) { struct lbuf *next = lbuf->l_freelist; __free_page(lbuf->l_page); kfree(lbuf); lbuf = next; } } /* * lbmAllocate() * * allocate an empty log buffer */ static struct lbuf *lbmAllocate(struct jfs_log * log, int pn) { struct lbuf *bp; unsigned long flags; /* * recycle from log buffer freelist if any */ LCACHE_LOCK(flags); LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags); log->lbuf_free = bp->l_freelist; LCACHE_UNLOCK(flags); bp->l_flag = 0; bp->l_wqnext = NULL; bp->l_freelist = NULL; bp->l_pn = pn; bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize)); bp->l_ceor = 0; return bp; } /* * lbmFree() * * release a log buffer to freelist */ static void lbmFree(struct lbuf * bp) { unsigned long flags; LCACHE_LOCK(flags); lbmfree(bp); LCACHE_UNLOCK(flags); } static void lbmfree(struct lbuf * bp) { struct jfs_log *log = bp->l_log; assert(bp->l_wqnext == NULL); /* * return the buffer to head of freelist */ bp->l_freelist = log->lbuf_free; log->lbuf_free = bp; wake_up(&log->free_wait); return; } /* * NAME: lbmRedrive * * FUNCTION: add a log buffer to the the log redrive list * * PARAMETER: * bp - log buffer * * NOTES: * Takes log_redrive_lock. */ static inline void lbmRedrive(struct lbuf *bp) { unsigned long flags; spin_lock_irqsave(&log_redrive_lock, flags); bp->l_redrive_next = log_redrive_list; log_redrive_list = bp; spin_unlock_irqrestore(&log_redrive_lock, flags); wake_up(&jfs_IO_thread_wait); } /* * lbmRead() */ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp) { struct bio *bio; struct lbuf *bp; /* * allocate a log buffer */ *bpp = bp = lbmAllocate(log, pn); jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn); bp->l_flag |= lbmREAD; bio = bio_alloc(GFP_NOFS, 1); bio->bi_sector = bp->l_blkno << (log->l2bsize - 9); bio->bi_bdev = log->bdev; bio->bi_io_vec[0].bv_page = bp->l_page; bio->bi_io_vec[0].bv_len = LOGPSIZE; bio->bi_io_vec[0].bv_offset = bp->l_offset; bio->bi_vcnt = 1; bio->bi_idx = 0; bio->bi_size = LOGPSIZE; bio->bi_end_io = lbmIODone; bio->bi_private = bp; submit_bio(READ_SYNC, bio); wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD)); return 0; } /* * lbmWrite() * * buffer at head of pageout queue stays after completion of * partial-page pageout and redriven by explicit initiation of * pageout by caller until full-page pageout is completed and * released. * * device driver i/o done redrives pageout of new buffer at * head of pageout queue when current buffer at head of pageout * queue is released at the completion of its full-page pageout. * * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit(). * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone() */ static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block) { struct lbuf *tail; unsigned long flags; jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn); /* map the logical block address to physical block address */ bp->l_blkno = log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize)); LCACHE_LOCK(flags); /* disable+lock */ /* * initialize buffer for device driver */ bp->l_flag = flag; /* * insert bp at tail of write queue associated with log * * (request is either for bp already/currently at head of queue * or new bp to be inserted at tail) */ tail = log->wqueue; /* is buffer not already on write queue ? */ if (bp->l_wqnext == NULL) { /* insert at tail of wqueue */ if (tail == NULL) { log->wqueue = bp; bp->l_wqnext = bp; } else { log->wqueue = bp; bp->l_wqnext = tail->l_wqnext; tail->l_wqnext = bp; } tail = bp; } /* is buffer at head of wqueue and for write ? */ if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) { LCACHE_UNLOCK(flags); /* unlock+enable */ return; } LCACHE_UNLOCK(flags); /* unlock+enable */ if (cant_block) lbmRedrive(bp); else if (flag & lbmSYNC) lbmStartIO(bp); else { LOGGC_UNLOCK(log); lbmStartIO(bp); LOGGC_LOCK(log); } } /* * lbmDirectWrite() * * initiate pageout bypassing write queue for sidestream * (e.g., log superblock) write; */ static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag) { jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn); /* * initialize buffer for device driver */ bp->l_flag = flag | lbmDIRECT; /* map the logical block address to physical block address */ bp->l_blkno = log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize)); /* * initiate pageout of the page */ lbmStartIO(bp); } /* * NAME: lbmStartIO() * * FUNCTION: Interface to DD strategy routine * * RETURN: none * * serialization: LCACHE_LOCK() is NOT held during log i/o; */ static void lbmStartIO(struct lbuf * bp) { struct bio *bio; struct jfs_log *log = bp->l_log; jfs_info("lbmStartIO\n"); bio = bio_alloc(GFP_NOFS, 1); bio->bi_sector = bp->l_blkno << (log->l2bsize - 9); bio->bi_bdev = log->bdev; bio->bi_io_vec[0].bv_page = bp->l_page; bio->bi_io_vec[0].bv_len = LOGPSIZE; bio->bi_io_vec[0].bv_offset = bp->l_offset; bio->bi_vcnt = 1; bio->bi_idx = 0; bio->bi_size = LOGPSIZE; bio->bi_end_io = lbmIODone; bio->bi_private = bp; /* check if journaling to disk has been disabled */ if (log->no_integrity) { bio->bi_size = 0; lbmIODone(bio, 0, 0); } else { submit_bio(WRITE_SYNC, bio); INCREMENT(lmStat.submitted); } } /* * lbmIOWait() */ static int lbmIOWait(struct lbuf * bp, int flag) { unsigned long flags; int rc = 0; jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag); LCACHE_LOCK(flags); /* disable+lock */ LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags); rc = (bp->l_flag & lbmERROR) ? -EIO : 0; if (flag & lbmFREE) lbmfree(bp); LCACHE_UNLOCK(flags); /* unlock+enable */ jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag); return rc; } /* * lbmIODone() * * executed at INTIODONE level */ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error) { struct lbuf *bp = bio->bi_private; struct lbuf *nextbp, *tail; struct jfs_log *log; unsigned long flags; if (bio->bi_size) return 1; /* * get back jfs buffer bound to the i/o buffer */ jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag); LCACHE_LOCK(flags); /* disable+lock */ bp->l_flag |= lbmDONE; if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { bp->l_flag |= lbmERROR; jfs_err("lbmIODone: I/O error in JFS log"); } bio_put(bio); /* * pagein completion */ if (bp->l_flag & lbmREAD) { bp->l_flag &= ~lbmREAD; LCACHE_UNLOCK(flags); /* unlock+enable */ /* wakeup I/O initiator */ LCACHE_WAKEUP(&bp->l_ioevent); return 0; } /* * pageout completion * * the bp at the head of write queue has completed pageout. * * if single-commit/full-page pageout, remove the current buffer * from head of pageout queue, and redrive pageout with * the new buffer at head of pageout queue; * otherwise, the partial-page pageout buffer stays at * the head of pageout queue to be redriven for pageout * by lmGroupCommit() until full-page pageout is completed. */ bp->l_flag &= ~lbmWRITE; INCREMENT(lmStat.pagedone); /* update committed lsn */ log = bp->l_log; log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor; if (bp->l_flag & lbmDIRECT) { LCACHE_WAKEUP(&bp->l_ioevent); LCACHE_UNLOCK(flags); return 0; } tail = log->wqueue; /* single element queue */ if (bp == tail) { /* remove head buffer of full-page pageout * from log device write queue */ if (bp->l_flag & lbmRELEASE) { log->wqueue = NULL; bp->l_wqnext = NULL; } } /* multi element queue */ else { /* remove head buffer of full-page pageout * from log device write queue */ if (bp->l_flag & lbmRELEASE) { nextbp = tail->l_wqnext = bp->l_wqnext; bp->l_wqnext = NULL; /* * redrive pageout of next page at head of write queue: * redrive next page without any bound tblk * (i.e., page w/o any COMMIT records), or * first page of new group commit which has been * queued after current page (subsequent pageout * is performed synchronously, except page without * any COMMITs) by lmGroupCommit() as indicated * by lbmWRITE flag; */ if (nextbp->l_flag & lbmWRITE) { /* * We can't do the I/O at interrupt time. * The jfsIO thread can do it */ lbmRedrive(nextbp); } } } /* * synchronous pageout: * * buffer has not necessarily been removed from write queue * (e.g., synchronous write of partial-page with COMMIT): * leave buffer for i/o initiator to dispose */ if (bp->l_flag & lbmSYNC) { LCACHE_UNLOCK(flags); /* unlock+enable */ /* wakeup I/O initiator */ LCACHE_WAKEUP(&bp->l_ioevent); } /* * Group Commit pageout: */ else if (bp->l_flag & lbmGC) { LCACHE_UNLOCK(flags); lmPostGC(bp); } /* * asynchronous pageout: * * buffer must have been removed from write queue: * insert buffer at head of freelist where it can be recycled */ else { assert(bp->l_flag & lbmRELEASE); assert(bp->l_flag & lbmFREE); lbmfree(bp); LCACHE_UNLOCK(flags); /* unlock+enable */ } return 0; } int jfsIOWait(void *arg) { struct lbuf *bp; daemonize("jfsIO"); complete(&jfsIOwait); do { DECLARE_WAITQUEUE(wq, current); spin_lock_irq(&log_redrive_lock); while ((bp = log_redrive_list) != 0) { log_redrive_list = bp->l_redrive_next; bp->l_redrive_next = NULL; spin_unlock_irq(&log_redrive_lock); lbmStartIO(bp); spin_lock_irq(&log_redrive_lock); } if (current->flags & PF_FREEZE) { spin_unlock_irq(&log_redrive_lock); refrigerator(PF_FREEZE); } else { add_wait_queue(&jfs_IO_thread_wait, &wq); set_current_state(TASK_INTERRUPTIBLE); spin_unlock_irq(&log_redrive_lock); schedule(); current->state = TASK_RUNNING; remove_wait_queue(&jfs_IO_thread_wait, &wq); } } while (!jfs_stop_threads); jfs_info("jfsIOWait being killed!"); complete_and_exit(&jfsIOwait, 0); } /* * NAME: lmLogFormat()/jfs_logform() * * FUNCTION: format file system log * * PARAMETERS: * log - volume log * logAddress - start address of log space in FS block * logSize - length of log space in FS block; * * RETURN: 0 - success * -EIO - i/o error * * XXX: We're synchronously writing one page at a time. This needs to * be improved by writing multiple pages at once. */ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize) { int rc = -EIO; struct jfs_sb_info *sbi; struct logsuper *logsuper; struct logpage *lp; int lspn; /* log sequence page number */ struct lrd *lrd_ptr; int npages = 0; struct lbuf *bp; jfs_info("lmLogFormat: logAddress:%Ld logSize:%d", (long long)logAddress, logSize); sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list); /* allocate a log buffer */ bp = lbmAllocate(log, 1); npages = logSize >> sbi->l2nbperpage; /* * log space: * * page 0 - reserved; * page 1 - log superblock; * page 2 - log data page: A SYNC log record is written * into this page at logform time; * pages 3-N - log data page: set to empty log data pages; */ /* * init log superblock: log page 1 */ logsuper = (struct logsuper *) bp->l_ldata; logsuper->magic = cpu_to_le32(LOGMAGIC); logsuper->version = cpu_to_le32(LOGVERSION); logsuper->state = cpu_to_le32(LOGREDONE); logsuper->flag = cpu_to_le32(sbi->mntflag); /* ? */ logsuper->size = cpu_to_le32(npages); logsuper->bsize = cpu_to_le32(sbi->bsize); logsuper->l2bsize = cpu_to_le32(sbi->l2bsize); logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE); bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT; bp->l_blkno = logAddress + sbi->nbperpage; lbmStartIO(bp); if ((rc = lbmIOWait(bp, 0))) goto exit; /* * init pages 2 to npages-1 as log data pages: * * log page sequence number (lpsn) initialization: * * pn: 0 1 2 3 n-1 * +-----+-----+=====+=====+===.....===+=====+ * lspn: N-1 0 1 N-2 * <--- N page circular file ----> * * the N (= npages-2) data pages of the log is maintained as * a circular file for the log records; * lpsn grows by 1 monotonically as each log page is written * to the circular file of the log; * and setLogpage() will not reset the page number even if * the eor is equal to LOGPHDRSIZE. In order for binary search * still work in find log end process, we have to simulate the * log wrap situation at the log format time. * The 1st log page written will have the highest lpsn. Then * the succeeding log pages will have ascending order of * the lspn starting from 0, ... (N-2) */ lp = (struct logpage *) bp->l_ldata; /* * initialize 1st log page to be written: lpsn = N - 1, * write a SYNCPT log record is written to this page */ lp->h.page = lp->t.page = cpu_to_le32(npages - 3); lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE); lrd_ptr = (struct lrd *) &lp->data; lrd_ptr->logtid = 0; lrd_ptr->backchain = 0; lrd_ptr->type = cpu_to_le16(LOG_SYNCPT); lrd_ptr->length = 0; lrd_ptr->log.syncpt.sync = 0; bp->l_blkno += sbi->nbperpage; bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT; lbmStartIO(bp); if ((rc = lbmIOWait(bp, 0))) goto exit; /* * initialize succeeding log pages: lpsn = 0, 1, ..., (N-2) */ for (lspn = 0; lspn < npages - 3; lspn++) { lp->h.page = lp->t.page = cpu_to_le32(lspn); lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE); bp->l_blkno += sbi->nbperpage; bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT; lbmStartIO(bp); if ((rc = lbmIOWait(bp, 0))) goto exit; } rc = 0; exit: /* * finalize log */ /* release the buffer */ lbmFree(bp); return rc; } #ifdef CONFIG_JFS_STATISTICS int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length, int *eof, void *data) { int len = 0; off_t begin; len += sprintf(buffer, "JFS Logmgr stats\n" "================\n" "commits = %d\n" "writes submitted = %d\n" "writes completed = %d\n" "full pages submitted = %d\n" "partial pages submitted = %d\n", lmStat.commit, lmStat.submitted, lmStat.pagedone, lmStat.full_page, lmStat.partial_page); begin = offset; *start = buffer + begin; len -= begin; if (len > length) len = length; else *eof = 1; if (len < 0) len = 0; return len; } #endif /* CONFIG_JFS_STATISTICS */