summaryrefslogblamecommitdiffstats
path: root/drivers/hwtracing/intel_th/msu.c
blob: dbbe31df74df17a4d55a02a30acb643937f0fd5a (plain) (tree)
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052




























                                                                            


                           





























































































                                                                        





































































































































                                                                             














                                                                   



                            
                                                     

                                      





                                                                   
                                    
                               
                                      





































































































































                                                                                
                                                                                












































                                                                










                                                                             






                                                                        

                                                                       




                                         

                                             
















                                                                   
                      

                                                
 

                                  


                                                        



                                                                    








                                                                         
                                            





                                        
                                             






































                                                                           
                         



                                                          
                                    
 


                                          
 



                                             
 
                   





                                                                  





                                             






















































































































































































































































































































































































































































                                                                                

                                     


















                                                                             
                                                                    











































                                                                         





                                                              
                               
 
                        
                               
 














































                                                                          
                                  














                                                                 
                                               
 
                                                                






                                                         
                                                          





















































                                                                           
                                      








                                             








































































































































                                                                              




                                      































































                                                                          

                               













                                                          






                                                              











                                                                 







                                                     
                                            












                                                                         
/*
 * Intel(R) Trace Hub Memory Storage Unit
 *
 * Copyright (C) 2014-2015 Intel Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 */

#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt

#include <linux/types.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/uaccess.h>
#include <linux/sizes.h>
#include <linux/printk.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/io.h>
#include <linux/dma-mapping.h>

#ifdef CONFIG_X86
#include <asm/set_memory.h>
#endif

#include "intel_th.h"
#include "msu.h"

#define msc_dev(x) (&(x)->thdev->dev)

/**
 * struct msc_block - multiblock mode block descriptor
 * @bdesc:	pointer to hardware descriptor (beginning of the block)
 * @addr:	physical address of the block
 */
struct msc_block {
	struct msc_block_desc	*bdesc;
	dma_addr_t		addr;
};

/**
 * struct msc_window - multiblock mode window descriptor
 * @entry:	window list linkage (msc::win_list)
 * @pgoff:	page offset into the buffer that this window starts at
 * @nr_blocks:	number of blocks (pages) in this window
 * @block:	array of block descriptors
 */
struct msc_window {
	struct list_head	entry;
	unsigned long		pgoff;
	unsigned int		nr_blocks;
	struct msc		*msc;
	struct msc_block	block[0];
};

/**
 * struct msc_iter - iterator for msc buffer
 * @entry:		msc::iter_list linkage
 * @msc:		pointer to the MSC device
 * @start_win:		oldest window
 * @win:		current window
 * @offset:		current logical offset into the buffer
 * @start_block:	oldest block in the window
 * @block:		block number in the window
 * @block_off:		offset into current block
 * @wrap_count:		block wrapping handling
 * @eof:		end of buffer reached
 */
struct msc_iter {
	struct list_head	entry;
	struct msc		*msc;
	struct msc_window	*start_win;
	struct msc_window	*win;
	unsigned long		offset;
	int			start_block;
	int			block;
	unsigned int		block_off;
	unsigned int		wrap_count;
	unsigned int		eof;
};

/**
 * struct msc - MSC device representation
 * @reg_base:		register window base address
 * @thdev:		intel_th_device pointer
 * @win_list:		list of windows in multiblock mode
 * @nr_pages:		total number of pages allocated for this buffer
 * @single_sz:		amount of data in single mode
 * @single_wrap:	single mode wrap occurred
 * @base:		buffer's base pointer
 * @base_addr:		buffer's base address
 * @user_count:		number of users of the buffer
 * @mmap_count:		number of mappings
 * @buf_mutex:		mutex to serialize access to buffer-related bits

 * @enabled:		MSC is enabled
 * @wrap:		wrapping is enabled
 * @mode:		MSC operating mode
 * @burst_len:		write burst length
 * @index:		number of this MSC in the MSU
 */
struct msc {
	void __iomem		*reg_base;
	struct intel_th_device	*thdev;

	struct list_head	win_list;
	unsigned long		nr_pages;
	unsigned long		single_sz;
	unsigned int		single_wrap : 1;
	void			*base;
	dma_addr_t		base_addr;

	/* <0: no buffer, 0: no users, >0: active users */
	atomic_t		user_count;

	atomic_t		mmap_count;
	struct mutex		buf_mutex;

	struct list_head	iter_list;

	/* config */
	unsigned int		enabled : 1,
				wrap	: 1;
	unsigned int		mode;
	unsigned int		burst_len;
	unsigned int		index;
};

static inline bool msc_block_is_empty(struct msc_block_desc *bdesc)
{
	/* header hasn't been written */
	if (!bdesc->valid_dw)
		return true;

	/* valid_dw includes the header */
	if (!msc_data_sz(bdesc))
		return true;

	return false;
}

/**
 * msc_oldest_window() - locate the window with oldest data
 * @msc:	MSC device
 *
 * This should only be used in multiblock mode. Caller should hold the
 * msc::user_count reference.
 *
 * Return:	the oldest window with valid data
 */
static struct msc_window *msc_oldest_window(struct msc *msc)
{
	struct msc_window *win;
	u32 reg = ioread32(msc->reg_base + REG_MSU_MSC0NWSA);
	unsigned long win_addr = (unsigned long)reg << PAGE_SHIFT;
	unsigned int found = 0;

	if (list_empty(&msc->win_list))
		return NULL;

	/*
	 * we might need a radix tree for this, depending on how
	 * many windows a typical user would allocate; ideally it's
	 * something like 2, in which case we're good
	 */
	list_for_each_entry(win, &msc->win_list, entry) {
		if (win->block[0].addr == win_addr)
			found++;

		/* skip the empty ones */
		if (msc_block_is_empty(win->block[0].bdesc))
			continue;

		if (found)
			return win;
	}

	return list_entry(msc->win_list.next, struct msc_window, entry);
}

/**
 * msc_win_oldest_block() - locate the oldest block in a given window
 * @win:	window to look at
 *
 * Return:	index of the block with the oldest data
 */
static unsigned int msc_win_oldest_block(struct msc_window *win)
{
	unsigned int blk;
	struct msc_block_desc *bdesc = win->block[0].bdesc;

	/* without wrapping, first block is the oldest */
	if (!msc_block_wrapped(bdesc))
		return 0;

	/*
	 * with wrapping, last written block contains both the newest and the
	 * oldest data for this window.
	 */
	for (blk = 0; blk < win->nr_blocks; blk++) {
		bdesc = win->block[blk].bdesc;

		if (msc_block_last_written(bdesc))
			return blk;
	}

	return 0;
}

/**
 * msc_is_last_win() - check if a window is the last one for a given MSC
 * @win:	window
 * Return:	true if @win is the last window in MSC's multiblock buffer
 */
static inline bool msc_is_last_win(struct msc_window *win)
{
	return win->entry.next == &win->msc->win_list;
}

/**
 * msc_next_window() - return next window in the multiblock buffer
 * @win:	current window
 *
 * Return:	window following the current one
 */
static struct msc_window *msc_next_window(struct msc_window *win)
{
	if (msc_is_last_win(win))
		return list_entry(win->msc->win_list.next, struct msc_window,
				  entry);

	return list_entry(win->entry.next, struct msc_window, entry);
}

static struct msc_block_desc *msc_iter_bdesc(struct msc_iter *iter)
{
	return iter->win->block[iter->block].bdesc;
}

static void msc_iter_init(struct msc_iter *iter)
{
	memset(iter, 0, sizeof(*iter));
	iter->start_block = -1;
	iter->block = -1;
}

static struct msc_iter *msc_iter_install(struct msc *msc)
{
	struct msc_iter *iter;

	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
	if (!iter)
		return ERR_PTR(-ENOMEM);

	mutex_lock(&msc->buf_mutex);

	/*
	 * Reading and tracing are mutually exclusive; if msc is
	 * enabled, open() will fail; otherwise existing readers
	 * will prevent enabling the msc and the rest of fops don't
	 * need to worry about it.
	 */
	if (msc->enabled) {
		kfree(iter);
		iter = ERR_PTR(-EBUSY);
		goto unlock;
	}

	msc_iter_init(iter);
	iter->msc = msc;

	list_add_tail(&iter->entry, &msc->iter_list);
unlock:
	mutex_unlock(&msc->buf_mutex);

	return iter;
}

static void msc_iter_remove(struct msc_iter *iter, struct msc *msc)
{
	mutex_lock(&msc->buf_mutex);
	list_del(&iter->entry);
	mutex_unlock(&msc->buf_mutex);

	kfree(iter);
}

static void msc_iter_block_start(struct msc_iter *iter)
{
	if (iter->start_block != -1)
		return;

	iter->start_block = msc_win_oldest_block(iter->win);
	iter->block = iter->start_block;
	iter->wrap_count = 0;

	/*
	 * start with the block with oldest data; if data has wrapped
	 * in this window, it should be in this block
	 */
	if (msc_block_wrapped(msc_iter_bdesc(iter)))
		iter->wrap_count = 2;

}

static int msc_iter_win_start(struct msc_iter *iter, struct msc *msc)
{
	/* already started, nothing to do */
	if (iter->start_win)
		return 0;

	iter->start_win = msc_oldest_window(msc);
	if (!iter->start_win)
		return -EINVAL;

	iter->win = iter->start_win;
	iter->start_block = -1;

	msc_iter_block_start(iter);

	return 0;
}

static int msc_iter_win_advance(struct msc_iter *iter)
{
	iter->win = msc_next_window(iter->win);
	iter->start_block = -1;

	if (iter->win == iter->start_win) {
		iter->eof++;
		return 1;
	}

	msc_iter_block_start(iter);

	return 0;
}

static int msc_iter_block_advance(struct msc_iter *iter)
{
	iter->block_off = 0;

	/* wrapping */
	if (iter->wrap_count && iter->block == iter->start_block) {
		iter->wrap_count--;
		if (!iter->wrap_count)
			/* copied newest data from the wrapped block */
			return msc_iter_win_advance(iter);
	}

	/* no wrapping, check for last written block */
	if (!iter->wrap_count && msc_block_last_written(msc_iter_bdesc(iter)))
		/* copied newest data for the window */
		return msc_iter_win_advance(iter);

	/* block advance */
	if (++iter->block == iter->win->nr_blocks)
		iter->block = 0;

	/* no wrapping, sanity check in case there is no last written block */
	if (!iter->wrap_count && iter->block == iter->start_block)
		return msc_iter_win_advance(iter);

	return 0;
}

/**
 * msc_buffer_iterate() - go through multiblock buffer's data
 * @iter:	iterator structure
 * @size:	amount of data to scan
 * @data:	callback's private data
 * @fn:		iterator callback
 *
 * This will start at the window which will be written to next (containing
 * the oldest data) and work its way to the current window, calling @fn
 * for each chunk of data as it goes.
 *
 * Caller should have msc::user_count reference to make sure the buffer
 * doesn't disappear from under us.
 *
 * Return:	amount of data actually scanned.
 */
static ssize_t
msc_buffer_iterate(struct msc_iter *iter, size_t size, void *data,
		   unsigned long (*fn)(void *, void *, size_t))
{
	struct msc *msc = iter->msc;
	size_t len = size;
	unsigned int advance;

	if (iter->eof)
		return 0;

	/* start with the oldest window */
	if (msc_iter_win_start(iter, msc))
		return 0;

	do {
		unsigned long data_bytes = msc_data_sz(msc_iter_bdesc(iter));
		void *src = (void *)msc_iter_bdesc(iter) + MSC_BDESC;
		size_t tocopy = data_bytes, copied = 0;
		size_t remaining = 0;

		advance = 1;

		/*
		 * If block wrapping happened, we need to visit the last block
		 * twice, because it contains both the oldest and the newest
		 * data in this window.
		 *
		 * First time (wrap_count==2), in the very beginning, to collect
		 * the oldest data, which is in the range
		 * (data_bytes..DATA_IN_PAGE).
		 *
		 * Second time (wrap_count==1), it's just like any other block,
		 * containing data in the range of [MSC_BDESC..data_bytes].
		 */
		if (iter->block == iter->start_block && iter->wrap_count == 2) {
			tocopy = DATA_IN_PAGE - data_bytes;
			src += data_bytes;
		}

		if (!tocopy)
			goto next_block;

		tocopy -= iter->block_off;
		src += iter->block_off;

		if (len < tocopy) {
			tocopy = len;
			advance = 0;
		}

		remaining = fn(data, src, tocopy);

		if (remaining)
			advance = 0;

		copied = tocopy - remaining;
		len -= copied;
		iter->block_off += copied;
		iter->offset += copied;

		if (!advance)
			break;

next_block:
		if (msc_iter_block_advance(iter))
			break;

	} while (len);

	return size - len;
}

/**
 * msc_buffer_clear_hw_header() - clear hw header for multiblock
 * @msc:	MSC device
 */
static void msc_buffer_clear_hw_header(struct msc *msc)
{
	struct msc_window *win;

	list_for_each_entry(win, &msc->win_list, entry) {
		unsigned int blk;
		size_t hw_sz = sizeof(struct msc_block_desc) -
			offsetof(struct msc_block_desc, hw_tag);

		for (blk = 0; blk < win->nr_blocks; blk++) {
			struct msc_block_desc *bdesc = win->block[blk].bdesc;

			memset(&bdesc->hw_tag, 0, hw_sz);
		}
	}
}

/**
 * msc_configure() - set up MSC hardware
 * @msc:	the MSC device to configure
 *
 * Program storage mode, wrapping, burst length and trace buffer address
 * into a given MSC. Then, enable tracing and set msc::enabled.
 * The latter is serialized on msc::buf_mutex, so make sure to hold it.
 */
static int msc_configure(struct msc *msc)
{
	u32 reg;

	lockdep_assert_held(&msc->buf_mutex);

	if (msc->mode > MSC_MODE_MULTI)
		return -ENOTSUPP;

	if (msc->mode == MSC_MODE_MULTI)
		msc_buffer_clear_hw_header(msc);

	reg = msc->base_addr >> PAGE_SHIFT;
	iowrite32(reg, msc->reg_base + REG_MSU_MSC0BAR);

	if (msc->mode == MSC_MODE_SINGLE) {
		reg = msc->nr_pages;
		iowrite32(reg, msc->reg_base + REG_MSU_MSC0SIZE);
	}

	reg = ioread32(msc->reg_base + REG_MSU_MSC0CTL);
	reg &= ~(MSC_MODE | MSC_WRAPEN | MSC_EN | MSC_RD_HDR_OVRD);

	reg |= MSC_EN;
	reg |= msc->mode << __ffs(MSC_MODE);
	reg |= msc->burst_len << __ffs(MSC_LEN);

	if (msc->wrap)
		reg |= MSC_WRAPEN;

	iowrite32(reg, msc->reg_base + REG_MSU_MSC0CTL);

	msc->thdev->output.multiblock = msc->mode == MSC_MODE_MULTI;
	intel_th_trace_enable(msc->thdev);
	msc->enabled = 1;


	return 0;
}

/**
 * msc_disable() - disable MSC hardware
 * @msc:	MSC device to disable
 *
 * If @msc is enabled, disable tracing on the switch and then disable MSC
 * storage. Caller must hold msc::buf_mutex.
 */
static void msc_disable(struct msc *msc)
{
	unsigned long count;
	u32 reg;

	lockdep_assert_held(&msc->buf_mutex);

	intel_th_trace_disable(msc->thdev);

	for (reg = 0, count = MSC_PLE_WAITLOOP_DEPTH;
	     count && !(reg & MSCSTS_PLE); count--) {
		reg = ioread32(msc->reg_base + REG_MSU_MSC0STS);
		cpu_relax();
	}

	if (!count)
		dev_dbg(msc_dev(msc), "timeout waiting for MSC0 PLE\n");

	if (msc->mode == MSC_MODE_SINGLE) {
		msc->single_wrap = !!(reg & MSCSTS_WRAPSTAT);

		reg = ioread32(msc->reg_base + REG_MSU_MSC0MWP);
		msc->single_sz = reg & ((msc->nr_pages << PAGE_SHIFT) - 1);
		dev_dbg(msc_dev(msc), "MSCnMWP: %08x/%08lx, wrap: %d\n",
			reg, msc->single_sz, msc->single_wrap);
	}

	reg = ioread32(msc->reg_base + REG_MSU_MSC0CTL);
	reg &= ~MSC_EN;
	iowrite32(reg, msc->reg_base + REG_MSU_MSC0CTL);
	msc->enabled = 0;

	iowrite32(0, msc->reg_base + REG_MSU_MSC0BAR);
	iowrite32(0, msc->reg_base + REG_MSU_MSC0SIZE);

	dev_dbg(msc_dev(msc), "MSCnNWSA: %08x\n",
		ioread32(msc->reg_base + REG_MSU_MSC0NWSA));

	reg = ioread32(msc->reg_base + REG_MSU_MSC0STS);
	dev_dbg(msc_dev(msc), "MSCnSTS: %08x\n", reg);
}

static int intel_th_msc_activate(struct intel_th_device *thdev)
{
	struct msc *msc = dev_get_drvdata(&thdev->dev);
	int ret = -EBUSY;

	if (!atomic_inc_unless_negative(&msc->user_count))
		return -ENODEV;

	mutex_lock(&msc->buf_mutex);

	/* if there are readers, refuse */
	if (list_empty(&msc->iter_list))
		ret = msc_configure(msc);

	mutex_unlock(&msc->buf_mutex);

	if (ret)
		atomic_dec(&msc->user_count);

	return ret;
}

static void intel_th_msc_deactivate(struct intel_th_device *thdev)
{
	struct msc *msc = dev_get_drvdata(&thdev->dev);

	mutex_lock(&msc->buf_mutex);
	if (msc->enabled) {
		msc_disable(msc);
		atomic_dec(&msc->user_count);
	}
	mutex_unlock(&msc->buf_mutex);
}

/**
 * msc_buffer_contig_alloc() - allocate a contiguous buffer for SINGLE mode
 * @msc:	MSC device
 * @size:	allocation size in bytes
 *
 * This modifies msc::base, which requires msc::buf_mutex to serialize, so the
 * caller is expected to hold it.
 *
 * Return:	0 on success, -errno otherwise.
 */
static int msc_buffer_contig_alloc(struct msc *msc, unsigned long size)
{
	unsigned int order = get_order(size);
	struct page *page;

	if (!size)
		return 0;

	page = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
	if (!page)
		return -ENOMEM;

	split_page(page, order);
	msc->nr_pages = size >> PAGE_SHIFT;
	msc->base = page_address(page);
	msc->base_addr = page_to_phys(page);

	return 0;
}

/**
 * msc_buffer_contig_free() - free a contiguous buffer
 * @msc:	MSC configured in SINGLE mode
 */
static void msc_buffer_contig_free(struct msc *msc)
{
	unsigned long off;

	for (off = 0; off < msc->nr_pages << PAGE_SHIFT; off += PAGE_SIZE) {
		struct page *page = virt_to_page(msc->base + off);

		page->mapping = NULL;
		__free_page(page);
	}

	msc->nr_pages = 0;
}

/**
 * msc_buffer_contig_get_page() - find a page at a given offset
 * @msc:	MSC configured in SINGLE mode
 * @pgoff:	page offset
 *
 * Return:	page, if @pgoff is within the range, NULL otherwise.
 */
static struct page *msc_buffer_contig_get_page(struct msc *msc,
					       unsigned long pgoff)
{
	if (pgoff >= msc->nr_pages)
		return NULL;

	return virt_to_page(msc->base + (pgoff << PAGE_SHIFT));
}

/**
 * msc_buffer_win_alloc() - alloc a window for a multiblock mode
 * @msc:	MSC device
 * @nr_blocks:	number of pages in this window
 *
 * This modifies msc::win_list and msc::base, which requires msc::buf_mutex
 * to serialize, so the caller is expected to hold it.
 *
 * Return:	0 on success, -errno otherwise.
 */
static int msc_buffer_win_alloc(struct msc *msc, unsigned int nr_blocks)
{
	struct msc_window *win;
	unsigned long size = PAGE_SIZE;
	int i, ret = -ENOMEM;

	if (!nr_blocks)
		return 0;

	win = kzalloc(offsetof(struct msc_window, block[nr_blocks]),
		      GFP_KERNEL);
	if (!win)
		return -ENOMEM;

	if (!list_empty(&msc->win_list)) {
		struct msc_window *prev = list_entry(msc->win_list.prev,
						     struct msc_window, entry);

		win->pgoff = prev->pgoff + prev->nr_blocks;
	}

	for (i = 0; i < nr_blocks; i++) {
		win->block[i].bdesc = dma_alloc_coherent(msc_dev(msc), size,
							 &win->block[i].addr,
							 GFP_KERNEL);

#ifdef CONFIG_X86
		/* Set the page as uncached */
		set_memory_uc((unsigned long)win->block[i].bdesc, 1);
#endif

		if (!win->block[i].bdesc)
			goto err_nomem;
	}

	win->msc = msc;
	win->nr_blocks = nr_blocks;

	if (list_empty(&msc->win_list)) {
		msc->base = win->block[0].bdesc;
		msc->base_addr = win->block[0].addr;
	}

	list_add_tail(&win->entry, &msc->win_list);
	msc->nr_pages += nr_blocks;

	return 0;

err_nomem:
	for (i--; i >= 0; i--) {
#ifdef CONFIG_X86
		/* Reset the page to write-back before releasing */
		set_memory_wb((unsigned long)win->block[i].bdesc, 1);
#endif
		dma_free_coherent(msc_dev(msc), size, win->block[i].bdesc,
				  win->block[i].addr);
	}
	kfree(win);

	return ret;
}

/**
 * msc_buffer_win_free() - free a window from MSC's window list
 * @msc:	MSC device
 * @win:	window to free
 *
 * This modifies msc::win_list and msc::base, which requires msc::buf_mutex
 * to serialize, so the caller is expected to hold it.
 */
static void msc_buffer_win_free(struct msc *msc, struct msc_window *win)
{
	int i;

	msc->nr_pages -= win->nr_blocks;

	list_del(&win->entry);
	if (list_empty(&msc->win_list)) {
		msc->base = NULL;
		msc->base_addr = 0;
	}

	for (i = 0; i < win->nr_blocks; i++) {
		struct page *page = virt_to_page(win->block[i].bdesc);

		page->mapping = NULL;
#ifdef CONFIG_X86
		/* Reset the page to write-back before releasing */
		set_memory_wb((unsigned long)win->block[i].bdesc, 1);
#endif
		dma_free_coherent(msc_dev(win->msc), PAGE_SIZE,
				  win->block[i].bdesc, win->block[i].addr);
	}

	kfree(win);
}

/**
 * msc_buffer_relink() - set up block descriptors for multiblock mode
 * @msc:	MSC device
 *
 * This traverses msc::win_list, which requires msc::buf_mutex to serialize,
 * so the caller is expected to hold it.
 */
static void msc_buffer_relink(struct msc *msc)
{
	struct msc_window *win, *next_win;

	/* call with msc::mutex locked */
	list_for_each_entry(win, &msc->win_list, entry) {
		unsigned int blk;
		u32 sw_tag = 0;

		/*
		 * Last window's next_win should point to the first window
		 * and MSC_SW_TAG_LASTWIN should be set.
		 */
		if (msc_is_last_win(win)) {
			sw_tag |= MSC_SW_TAG_LASTWIN;
			next_win = list_entry(msc->win_list.next,
					      struct msc_window, entry);
		} else {
			next_win = list_entry(win->entry.next,
					      struct msc_window, entry);
		}

		for (blk = 0; blk < win->nr_blocks; blk++) {
			struct msc_block_desc *bdesc = win->block[blk].bdesc;

			memset(bdesc, 0, sizeof(*bdesc));

			bdesc->next_win = next_win->block[0].addr >> PAGE_SHIFT;

			/*
			 * Similarly to last window, last block should point
			 * to the first one.
			 */
			if (blk == win->nr_blocks - 1) {
				sw_tag |= MSC_SW_TAG_LASTBLK;
				bdesc->next_blk =
					win->block[0].addr >> PAGE_SHIFT;
			} else {
				bdesc->next_blk =
					win->block[blk + 1].addr >> PAGE_SHIFT;
			}

			bdesc->sw_tag = sw_tag;
			bdesc->block_sz = PAGE_SIZE / 64;
		}
	}

	/*
	 * Make the above writes globally visible before tracing is
	 * enabled to make sure hardware sees them coherently.
	 */
	wmb();
}

static void msc_buffer_multi_free(struct msc *msc)
{
	struct msc_window *win, *iter;

	list_for_each_entry_safe(win, iter, &msc->win_list, entry)
		msc_buffer_win_free(msc, win);
}

static int msc_buffer_multi_alloc(struct msc *msc, unsigned long *nr_pages,
				  unsigned int nr_wins)
{
	int ret, i;

	for (i = 0; i < nr_wins; i++) {
		ret = msc_buffer_win_alloc(msc, nr_pages[i]);
		if (ret) {
			msc_buffer_multi_free(msc);
			return ret;
		}
	}

	msc_buffer_relink(msc);

	return 0;
}

/**
 * msc_buffer_free() - free buffers for MSC
 * @msc:	MSC device
 *
 * Free MSC's storage buffers.
 *
 * This modifies msc::win_list and msc::base, which requires msc::buf_mutex to
 * serialize, so the caller is expected to hold it.
 */
static void msc_buffer_free(struct msc *msc)
{
	if (msc->mode == MSC_MODE_SINGLE)
		msc_buffer_contig_free(msc);
	else if (msc->mode == MSC_MODE_MULTI)
		msc_buffer_multi_free(msc);
}

/**
 * msc_buffer_alloc() - allocate a buffer for MSC
 * @msc:	MSC device
 * @size:	allocation size in bytes
 *
 * Allocate a storage buffer for MSC, depending on the msc::mode, it will be
 * either done via msc_buffer_contig_alloc() for SINGLE operation mode or
 * msc_buffer_win_alloc() for multiblock operation. The latter allocates one
 * window per invocation, so in multiblock mode this can be called multiple
 * times for the same MSC to allocate multiple windows.
 *
 * This modifies msc::win_list and msc::base, which requires msc::buf_mutex
 * to serialize, so the caller is expected to hold it.
 *
 * Return:	0 on success, -errno otherwise.
 */
static int msc_buffer_alloc(struct msc *msc, unsigned long *nr_pages,
			    unsigned int nr_wins)
{
	int ret;

	/* -1: buffer not allocated */
	if (atomic_read(&msc->user_count) != -1)
		return -EBUSY;

	if (msc->mode == MSC_MODE_SINGLE) {
		if (nr_wins != 1)
			return -EINVAL;

		ret = msc_buffer_contig_alloc(msc, nr_pages[0] << PAGE_SHIFT);
	} else if (msc->mode == MSC_MODE_MULTI) {
		ret = msc_buffer_multi_alloc(msc, nr_pages, nr_wins);
	} else {
		ret = -ENOTSUPP;
	}

	if (!ret) {
		/* allocation should be visible before the counter goes to 0 */
		smp_mb__before_atomic();

		if (WARN_ON_ONCE(atomic_cmpxchg(&msc->user_count, -1, 0) != -1))
			return -EINVAL;
	}

	return ret;
}

/**
 * msc_buffer_unlocked_free_unless_used() - free a buffer unless it's in use
 * @msc:	MSC device
 *
 * This will free MSC buffer unless it is in use or there is no allocated
 * buffer.
 * Caller needs to hold msc::buf_mutex.
 *
 * Return:	0 on successful deallocation or if there was no buffer to
 *		deallocate, -EBUSY if there are active users.
 */
static int msc_buffer_unlocked_free_unless_used(struct msc *msc)
{
	int count, ret = 0;

	count = atomic_cmpxchg(&msc->user_count, 0, -1);

	/* > 0: buffer is allocated and has users */
	if (count > 0)
		ret = -EBUSY;
	/* 0: buffer is allocated, no users */
	else if (!count)
		msc_buffer_free(msc);
	/* < 0: no buffer, nothing to do */

	return ret;
}

/**
 * msc_buffer_free_unless_used() - free a buffer unless it's in use
 * @msc:	MSC device
 *
 * This is a locked version of msc_buffer_unlocked_free_unless_used().
 */
static int msc_buffer_free_unless_used(struct msc *msc)
{
	int ret;

	mutex_lock(&msc->buf_mutex);
	ret = msc_buffer_unlocked_free_unless_used(msc);
	mutex_unlock(&msc->buf_mutex);

	return ret;
}

/**
 * msc_buffer_get_page() - get MSC buffer page at a given offset
 * @msc:	MSC device
 * @pgoff:	page offset into the storage buffer
 *
 * This traverses msc::win_list, so holding msc::buf_mutex is expected from
 * the caller.
 *
 * Return:	page if @pgoff corresponds to a valid buffer page or NULL.
 */
static struct page *msc_buffer_get_page(struct msc *msc, unsigned long pgoff)
{
	struct msc_window *win;

	if (msc->mode == MSC_MODE_SINGLE)
		return msc_buffer_contig_get_page(msc, pgoff);

	list_for_each_entry(win, &msc->win_list, entry)
		if (pgoff >= win->pgoff && pgoff < win->pgoff + win->nr_blocks)
			goto found;

	return NULL;

found:
	pgoff -= win->pgoff;
	return virt_to_page(win->block[pgoff].bdesc);
}

/**
 * struct msc_win_to_user_struct - data for copy_to_user() callback
 * @buf:	userspace buffer to copy data to
 * @offset:	running offset
 */
struct msc_win_to_user_struct {
	char __user	*buf;
	unsigned long	offset;
};

/**
 * msc_win_to_user() - iterator for msc_buffer_iterate() to copy data to user
 * @data:	callback's private data
 * @src:	source buffer
 * @len:	amount of data to copy from the source buffer
 */
static unsigned long msc_win_to_user(void *data, void *src, size_t len)
{
	struct msc_win_to_user_struct *u = data;
	unsigned long ret;

	ret = copy_to_user(u->buf + u->offset, src, len);
	u->offset += len - ret;

	return ret;
}


/*
 * file operations' callbacks
 */

static int intel_th_msc_open(struct inode *inode, struct file *file)
{
	struct intel_th_device *thdev = file->private_data;
	struct msc *msc = dev_get_drvdata(&thdev->dev);
	struct msc_iter *iter;

	if (!capable(CAP_SYS_RAWIO))
		return -EPERM;

	iter = msc_iter_install(msc);
	if (IS_ERR(iter))
		return PTR_ERR(iter);

	file->private_data = iter;

	return nonseekable_open(inode, file);
}

static int intel_th_msc_release(struct inode *inode, struct file *file)
{
	struct msc_iter *iter = file->private_data;
	struct msc *msc = iter->msc;

	msc_iter_remove(iter, msc);

	return 0;
}

static ssize_t
msc_single_to_user(struct msc *msc, char __user *buf, loff_t off, size_t len)
{
	unsigned long size = msc->nr_pages << PAGE_SHIFT, rem = len;
	unsigned long start = off, tocopy = 0;

	if (msc->single_wrap) {
		start += msc->single_sz;
		if (start < size) {
			tocopy = min(rem, size - start);
			if (copy_to_user(buf, msc->base + start, tocopy))
				return -EFAULT;

			buf += tocopy;
			rem -= tocopy;
			start += tocopy;
		}

		start &= size - 1;
		if (rem) {
			tocopy = min(rem, msc->single_sz - start);
			if (copy_to_user(buf, msc->base + start, tocopy))
				return -EFAULT;

			rem -= tocopy;
		}

		return len - rem;
	}

	if (copy_to_user(buf, msc->base + start, rem))
		return -EFAULT;

	return len;
}

static ssize_t intel_th_msc_read(struct file *file, char __user *buf,
				 size_t len, loff_t *ppos)
{
	struct msc_iter *iter = file->private_data;
	struct msc *msc = iter->msc;
	size_t size;
	loff_t off = *ppos;
	ssize_t ret = 0;

	if (!atomic_inc_unless_negative(&msc->user_count))
		return 0;

	if (msc->mode == MSC_MODE_SINGLE && !msc->single_wrap)
		size = msc->single_sz;
	else
		size = msc->nr_pages << PAGE_SHIFT;

	if (!size)
		goto put_count;

	if (off >= size)
		goto put_count;

	if (off + len >= size)
		len = size - off;

	if (msc->mode == MSC_MODE_SINGLE) {
		ret = msc_single_to_user(msc, buf, off, len);
		if (ret >= 0)
			*ppos += ret;
	} else if (msc->mode == MSC_MODE_MULTI) {
		struct msc_win_to_user_struct u = {
			.buf	= buf,
			.offset	= 0,
		};

		ret = msc_buffer_iterate(iter, len, &u, msc_win_to_user);
		if (ret >= 0)
			*ppos = iter->offset;
	} else {
		ret = -ENOTSUPP;
	}

put_count:
	atomic_dec(&msc->user_count);

	return ret;
}

/*
 * vm operations callbacks (vm_ops)
 */

static void msc_mmap_open(struct vm_area_struct *vma)
{
	struct msc_iter *iter = vma->vm_file->private_data;
	struct msc *msc = iter->msc;

	atomic_inc(&msc->mmap_count);
}

static void msc_mmap_close(struct vm_area_struct *vma)
{
	struct msc_iter *iter = vma->vm_file->private_data;
	struct msc *msc = iter->msc;
	unsigned long pg;

	if (!atomic_dec_and_mutex_lock(&msc->mmap_count, &msc->buf_mutex))
		return;

	/* drop page _refcounts */
	for (pg = 0; pg < msc->nr_pages; pg++) {
		struct page *page = msc_buffer_get_page(msc, pg);

		if (WARN_ON_ONCE(!page))
			continue;

		if (page->mapping)
			page->mapping = NULL;
	}

	/* last mapping -- drop user_count */
	atomic_dec(&msc->user_count);
	mutex_unlock(&msc->buf_mutex);
}

static int msc_mmap_fault(struct vm_fault *vmf)
{
	struct msc_iter *iter = vmf->vma->vm_file->private_data;
	struct msc *msc = iter->msc;

	vmf->page = msc_buffer_get_page(msc, vmf->pgoff);
	if (!vmf->page)
		return VM_FAULT_SIGBUS;

	get_page(vmf->page);
	vmf->page->mapping = vmf->vma->vm_file->f_mapping;
	vmf->page->index = vmf->pgoff;

	return 0;
}

static const struct vm_operations_struct msc_mmap_ops = {
	.open	= msc_mmap_open,
	.close	= msc_mmap_close,
	.fault	= msc_mmap_fault,
};

static int intel_th_msc_mmap(struct file *file, struct vm_area_struct *vma)
{
	unsigned long size = vma->vm_end - vma->vm_start;
	struct msc_iter *iter = vma->vm_file->private_data;
	struct msc *msc = iter->msc;
	int ret = -EINVAL;

	if (!size || offset_in_page(size))
		return -EINVAL;

	if (vma->vm_pgoff)
		return -EINVAL;

	/* grab user_count once per mmap; drop in msc_mmap_close() */
	if (!atomic_inc_unless_negative(&msc->user_count))
		return -EINVAL;

	if (msc->mode != MSC_MODE_SINGLE &&
	    msc->mode != MSC_MODE_MULTI)
		goto out;

	if (size >> PAGE_SHIFT != msc->nr_pages)
		goto out;

	atomic_set(&msc->mmap_count, 1);
	ret = 0;

out:
	if (ret)
		atomic_dec(&msc->user_count);

	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
	vma->vm_flags |= VM_DONTEXPAND | VM_DONTCOPY;
	vma->vm_ops = &msc_mmap_ops;
	return ret;
}

static const struct file_operations intel_th_msc_fops = {
	.open		= intel_th_msc_open,
	.release	= intel_th_msc_release,
	.read		= intel_th_msc_read,
	.mmap		= intel_th_msc_mmap,
	.llseek		= no_llseek,
	.owner		= THIS_MODULE,
};

static int intel_th_msc_init(struct msc *msc)
{
	atomic_set(&msc->user_count, -1);

	msc->mode = MSC_MODE_MULTI;
	mutex_init(&msc->buf_mutex);
	INIT_LIST_HEAD(&msc->win_list);
	INIT_LIST_HEAD(&msc->iter_list);

	msc->burst_len =
		(ioread32(msc->reg_base + REG_MSU_MSC0CTL) & MSC_LEN) >>
		__ffs(MSC_LEN);

	return 0;
}

static const char * const msc_mode[] = {
	[MSC_MODE_SINGLE]	= "single",
	[MSC_MODE_MULTI]	= "multi",
	[MSC_MODE_EXI]		= "ExI",
	[MSC_MODE_DEBUG]	= "debug",
};

static ssize_t
wrap_show(struct device *dev, struct device_attribute *attr, char *buf)
{
	struct msc *msc = dev_get_drvdata(dev);

	return scnprintf(buf, PAGE_SIZE, "%d\n", msc->wrap);
}

static ssize_t
wrap_store(struct device *dev, struct device_attribute *attr, const char *buf,
	   size_t size)
{
	struct msc *msc = dev_get_drvdata(dev);
	unsigned long val;
	int ret;

	ret = kstrtoul(buf, 10, &val);
	if (ret)
		return ret;

	msc->wrap = !!val;

	return size;
}

static DEVICE_ATTR_RW(wrap);

static ssize_t
mode_show(struct device *dev, struct device_attribute *attr, char *buf)
{
	struct msc *msc = dev_get_drvdata(dev);

	return scnprintf(buf, PAGE_SIZE, "%s\n", msc_mode[msc->mode]);
}

static ssize_t
mode_store(struct device *dev, struct device_attribute *attr, const char *buf,
	   size_t size)
{
	struct msc *msc = dev_get_drvdata(dev);
	size_t len = size;
	char *cp;
	int i, ret;

	if (!capable(CAP_SYS_RAWIO))
		return -EPERM;

	cp = memchr(buf, '\n', len);
	if (cp)
		len = cp - buf;

	for (i = 0; i < ARRAY_SIZE(msc_mode); i++)
		if (!strncmp(msc_mode[i], buf, len))
			goto found;

	return -EINVAL;

found:
	mutex_lock(&msc->buf_mutex);
	ret = msc_buffer_unlocked_free_unless_used(msc);
	if (!ret)
		msc->mode = i;
	mutex_unlock(&msc->buf_mutex);

	return ret ? ret : size;
}

static DEVICE_ATTR_RW(mode);

static ssize_t
nr_pages_show(struct device *dev, struct device_attribute *attr, char *buf)
{
	struct msc *msc = dev_get_drvdata(dev);
	struct msc_window *win;
	size_t count = 0;

	mutex_lock(&msc->buf_mutex);

	if (msc->mode == MSC_MODE_SINGLE)
		count = scnprintf(buf, PAGE_SIZE, "%ld\n", msc->nr_pages);
	else if (msc->mode == MSC_MODE_MULTI) {
		list_for_each_entry(win, &msc->win_list, entry) {
			count += scnprintf(buf + count, PAGE_SIZE - count,
					   "%d%c", win->nr_blocks,
					   msc_is_last_win(win) ? '\n' : ',');
		}
	} else {
		count = scnprintf(buf, PAGE_SIZE, "unsupported\n");
	}

	mutex_unlock(&msc->buf_mutex);

	return count;
}

static ssize_t
nr_pages_store(struct device *dev, struct device_attribute *attr,
	       const char *buf, size_t size)
{
	struct msc *msc = dev_get_drvdata(dev);
	unsigned long val, *win = NULL, *rewin;
	size_t len = size;
	const char *p = buf;
	char *end, *s;
	int ret, nr_wins = 0;

	if (!capable(CAP_SYS_RAWIO))
		return -EPERM;

	ret = msc_buffer_free_unless_used(msc);
	if (ret)
		return ret;

	/* scan the comma-separated list of allocation sizes */
	end = memchr(buf, '\n', len);
	if (end)
		len = end - buf;

	do {
		end = memchr(p, ',', len);
		s = kstrndup(p, end ? end - p : len, GFP_KERNEL);
		if (!s) {
			ret = -ENOMEM;
			goto free_win;
		}

		ret = kstrtoul(s, 10, &val);
		kfree(s);

		if (ret || !val)
			goto free_win;

		if (nr_wins && msc->mode == MSC_MODE_SINGLE) {
			ret = -EINVAL;
			goto free_win;
		}

		nr_wins++;
		rewin = krealloc(win, sizeof(*win) * nr_wins, GFP_KERNEL);
		if (!rewin) {
			kfree(win);
			return -ENOMEM;
		}

		win = rewin;
		win[nr_wins - 1] = val;

		if (!end)
			break;

		len -= end - p;
		p = end + 1;
	} while (len);

	mutex_lock(&msc->buf_mutex);
	ret = msc_buffer_alloc(msc, win, nr_wins);
	mutex_unlock(&msc->buf_mutex);

free_win:
	kfree(win);

	return ret ? ret : size;
}

static DEVICE_ATTR_RW(nr_pages);

static struct attribute *msc_output_attrs[] = {
	&dev_attr_wrap.attr,
	&dev_attr_mode.attr,
	&dev_attr_nr_pages.attr,
	NULL,
};

static struct attribute_group msc_output_group = {
	.attrs	= msc_output_attrs,
};

static int intel_th_msc_probe(struct intel_th_device *thdev)
{
	struct device *dev = &thdev->dev;
	struct resource *res;
	struct msc *msc;
	void __iomem *base;
	int err;

	res = intel_th_device_get_resource(thdev, IORESOURCE_MEM, 0);
	if (!res)
		return -ENODEV;

	base = devm_ioremap(dev, res->start, resource_size(res));
	if (!base)
		return -ENOMEM;

	msc = devm_kzalloc(dev, sizeof(*msc), GFP_KERNEL);
	if (!msc)
		return -ENOMEM;

	msc->index = thdev->id;

	msc->thdev = thdev;
	msc->reg_base = base + msc->index * 0x100;

	err = intel_th_msc_init(msc);
	if (err)
		return err;

	dev_set_drvdata(dev, msc);

	return 0;
}

static void intel_th_msc_remove(struct intel_th_device *thdev)
{
	struct msc *msc = dev_get_drvdata(&thdev->dev);
	int ret;

	intel_th_msc_deactivate(thdev);

	/*
	 * Buffers should not be used at this point except if the
	 * output character device is still open and the parent
	 * device gets detached from its bus, which is a FIXME.
	 */
	ret = msc_buffer_free_unless_used(msc);
	WARN_ON_ONCE(ret);
}

static struct intel_th_driver intel_th_msc_driver = {
	.probe	= intel_th_msc_probe,
	.remove	= intel_th_msc_remove,
	.activate	= intel_th_msc_activate,
	.deactivate	= intel_th_msc_deactivate,
	.fops	= &intel_th_msc_fops,
	.attr_group	= &msc_output_group,
	.driver	= {
		.name	= "msc",
		.owner	= THIS_MODULE,
	},
};

module_driver(intel_th_msc_driver,
	      intel_th_driver_register,
	      intel_th_driver_unregister);

MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("Intel(R) Trace Hub Memory Storage Unit driver");
MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>");