summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVineet Gupta2013-09-05 11:15:51 +0200
committerVineet Gupta2013-11-06 06:11:38 +0100
commitd4599baf5c773660f32ee6bc35c1afab009a52d9 (patch)
treee7538e616f56ceade60bd63833df965afbcc50b0
parentARC: cacheflush refactor #3: Unify the {d,i}cache flush leaf helpers (diff)
downloadkernel-qcow2-linux-d4599baf5c773660f32ee6bc35c1afab009a52d9.tar.gz
kernel-qcow2-linux-d4599baf5c773660f32ee6bc35c1afab009a52d9.tar.xz
kernel-qcow2-linux-d4599baf5c773660f32ee6bc35c1afab009a52d9.zip
ARC: cacheflush optim - PTAG can be loop invariant if V-P is const
Line op needs vaddr (indexing) and paddr (tag match). For page sized flushes (V-P const), each line op will need a different index, but the tag bits wil remain constant, hence paddr can be setup once outside the loop. This improves select LMBench numbers for Aliasing dcache where we have more "preventive" cache flushing. Processor, Processes - times in microseconds - smaller is better ------------------------------------------------------------------------------ Host OS Mhz null null open slct sig sig fork exec sh call I/O stat clos TCP inst hndl proc proc proc --------- ------------- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 3.11-rc7- Linux 3.11.0- 80 4.66 8.88 69.7 112. 268. 8.60 28.0 3489 13.K 27.K # Non alias ARC700 3.11-rc7- Linux 3.11.0- 80 4.64 8.51 68.6 98.5 271. 8.58 28.1 4160 15.K 32.K # Aliasing 3.11-rc7- Linux 3.11.0- 80 4.64 8.51 69.8 99.4 270. 8.73 27.5 3880 15.K 31.K # PTAG loop Inv Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
-rw-r--r--arch/arc/mm/cache_arc700.c14
1 files changed, 11 insertions, 3 deletions
diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index a152f3263ac0..6b58c1de7577 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -250,6 +250,7 @@ static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
{
unsigned int aux_cmd, aux_tag;
int num_lines;
+ const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE;
if (cacheop == OP_INV_IC) {
aux_cmd = ARC_REG_IC_IVIL;
@@ -267,7 +268,7 @@ static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
* -@paddr will be cache-line aligned already (being page aligned)
* -@sz will be integral multiple of line size (being page sized).
*/
- if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {
+ if (!full_page_op) {
sz += paddr & ~CACHE_LINE_MASK;
paddr &= CACHE_LINE_MASK;
vaddr &= CACHE_LINE_MASK;
@@ -278,19 +279,26 @@ static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
#if (CONFIG_ARC_MMU_VER <= 2)
/* MMUv2 and before: paddr contains stuffed vaddrs bits */
paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
+#else
+ /* if V-P const for loop, PTAG can be written once outside loop */
+ if (full_page_op)
+ write_aux_reg(ARC_REG_DC_PTAG, paddr);
#endif
while (num_lines-- > 0) {
#if (CONFIG_ARC_MMU_VER > 2)
/* MMUv3, cache ops require paddr seperately */
- write_aux_reg(ARC_REG_DC_PTAG, paddr);
+ if (!full_page_op) {
+ write_aux_reg(aux_tag, paddr);
+ paddr += L1_CACHE_BYTES;
+ }
write_aux_reg(aux_cmd, vaddr);
vaddr += L1_CACHE_BYTES;
#else
write_aux_reg(aux, paddr);
-#endif
paddr += L1_CACHE_BYTES;
+#endif
}
}