\documentclass[11pt,twoside]{article}\makeatletter

\IfFileExists{xcolor.sty}%
  {\RequirePackage{xcolor}}%
  {\RequirePackage{color}}
\usepackage{colortbl}
\usepackage{wrapfig}
\usepackage{ifxetex}
\ifxetex
  \usepackage{fontspec}
  \usepackage{xunicode}
  \catcode`⃥=\active \def⃥{\textbackslash}
  \catcode`❴=\active \def❴{\{}
  \catcode`❵=\active \def❵{\}}
  \def\textJapanese{\fontspec{Noto Sans CJK JP}}
  \def\textChinese{\fontspec{Noto Sans CJK SC}}
  \def\textKorean{\fontspec{Noto Sans CJK KR}}
  \setmonofont{DejaVu Sans Mono}
  
\else
  \IfFileExists{utf8x.def}%
   {\usepackage[utf8x]{inputenc}
      \PrerenderUnicode{–}
    }%
   {\usepackage[utf8]{inputenc}}
  \usepackage[english]{babel}
  \usepackage[T1]{fontenc}
  \usepackage{float}
  \usepackage[]{ucs}
  \uc@dclc{8421}{default}{\textbackslash }
  \uc@dclc{10100}{default}{\{}
  \uc@dclc{10101}{default}{\}}
  \uc@dclc{8491}{default}{\AA{}}
  \uc@dclc{8239}{default}{\,}
  \uc@dclc{20154}{default}{ }
  \uc@dclc{10148}{default}{>}
  \def\textschwa{\rotatebox{-90}{e}}
  \def\textJapanese{}
  \def\textChinese{}
  \IfFileExists{tipa.sty}{\usepackage{tipa}}{}
\fi
\def\exampleFont{\ttfamily\small}
\DeclareTextSymbol{\textpi}{OML}{25}
\usepackage{relsize}
\RequirePackage{array}
\def\@testpach{\@chclass
 \ifnum \@lastchclass=6 \@ne \@chnum \@ne \else
  \ifnum \@lastchclass=7 5 \else
   \ifnum \@lastchclass=8 \tw@ \else
    \ifnum \@lastchclass=9 \thr@@
   \else \z@
   \ifnum \@lastchclass = 10 \else
   \edef\@nextchar{\expandafter\string\@nextchar}%
   \@chnum
   \if \@nextchar c\z@ \else
    \if \@nextchar l\@ne \else
     \if \@nextchar r\tw@ \else
   \z@ \@chclass
   \if\@nextchar |\@ne \else
    \if \@nextchar !6 \else
     \if \@nextchar @7 \else
      \if \@nextchar (8 \else
       \if \@nextchar )9 \else
  10
  \@chnum
  \if \@nextchar m\thr@@\else
   \if \@nextchar p4 \else
    \if \@nextchar b5 \else
   \z@ \@chclass \z@ \@preamerr \z@ \fi \fi \fi \fi
   \fi \fi  \fi  \fi  \fi  \fi  \fi \fi \fi \fi \fi \fi}
\gdef\arraybackslash{\let\\=\@arraycr}
\def\@textsubscript#1{{\m@th\ensuremath{_{\mbox{\fontsize\sf@size\z@#1}}}}}
\def\Panel#1#2#3#4{\multicolumn{#3}{){\columncolor{#2}}#4}{#1}}
\def\abbr{}
\def\corr{}
\def\expan{}
\def\gap{}
\def\orig{}
\def\reg{}
\def\ref{}
\def\sic{}
\def\persName{}\def\name{}
\def\placeName{}
\def\orgName{}
\def\textcal#1{{\fontspec{Lucida Calligraphy}#1}}
\def\textgothic#1{{\fontspec{Lucida Blackletter}#1}}
\def\textlarge#1{{\large #1}}
\def\textoverbar#1{\ensuremath{\overline{#1}}}
\def\textquoted#1{‘#1’}
\def\textsmall#1{{\small #1}}
\def\textsubscript#1{\@textsubscript{\selectfont#1}}
\def\textxi{\ensuremath{\xi}}
\def\titlem{\itshape}
\newenvironment{biblfree}{}{\ifvmode\par\fi }
\newenvironment{bibl}{}{}
\newenvironment{byline}{\vskip6pt\itshape\fontsize{16pt}{18pt}\selectfont}{\par }
\newenvironment{citbibl}{}{\ifvmode\par\fi }
\newenvironment{docAuthor}{\ifvmode\vskip4pt\fontsize{16pt}{18pt}\selectfont\fi\itshape}{\ifvmode\par\fi }
\newenvironment{docDate}{}{\ifvmode\par\fi }
\newenvironment{docImprint}{\vskip 6pt}{\ifvmode\par\fi }
\newenvironment{docTitle}{\vskip6pt\bfseries\fontsize{22pt}{25pt}\selectfont}{\par }
\newenvironment{msHead}{\vskip 6pt}{\par}
\newenvironment{msItem}{\vskip 6pt}{\par}
\newenvironment{rubric}{}{}
\newenvironment{titlePart}{}{\par }

\newcolumntype{L}[1]{){\raggedright\arraybackslash}p{#1}}
\newcolumntype{C}[1]{){\centering\arraybackslash}p{#1}}
\newcolumntype{R}[1]{){\raggedleft\arraybackslash}p{#1}}
\newcolumntype{P}[1]{){\arraybackslash}p{#1}}
\newcolumntype{B}[1]{){\arraybackslash}b{#1}}
\newcolumntype{M}[1]{){\arraybackslash}m{#1}}
\definecolor{label}{gray}{0.75}
\def\unusedattribute#1{\sout{\textcolor{label}{#1}}}
\DeclareRobustCommand*{\xref}{\hyper@normalise\xref@}
\def\xref@#1#2{\hyper@linkurl{#2}{#1}}
\begingroup
\catcode`\_=\active
\gdef_#1{\ensuremath{\sb{\mathrm{#1}}}}
\endgroup
\mathcode`\_=\string"8000
\catcode`\_=12\relax

\usepackage[a4paper,twoside,lmargin=1in,rmargin=1in,tmargin=1in,bmargin=1in,marginparwidth=0.75in]{geometry}
\usepackage{framed}

\definecolor{shadecolor}{gray}{0.95}
\usepackage{longtable}
\usepackage[normalem]{ulem}
\usepackage{fancyvrb}
\usepackage{fancyhdr}
\usepackage{graphicx}
\usepackage{marginnote}

\renewcommand{\@cite}[1]{#1}


\renewcommand*{\marginfont}{\itshape\footnotesize}

\def\Gin@extensions{.pdf,.png,.jpg,.mps,.tif}

  \pagestyle{fancy}

\usepackage[pdftitle={Dynamic Power Reduction in Modified Lifting Scheme Based DWT for Image Processing},
 pdfauthor={}]{hyperref}
\hyperbaseurl{}

	 \paperwidth210mm
	 \paperheight297mm
              
\def\@pnumwidth{1.55em}
\def\@tocrmarg {2.55em}
\def\@dotsep{4.5}
\setcounter{tocdepth}{3}
\clubpenalty=8000
\emergencystretch 3em
\hbadness=4000
\hyphenpenalty=400
\pretolerance=750
\tolerance=2000
\vbadness=4000
\widowpenalty=10000

\renewcommand\section{\@startsection {section}{1}{\z@}%
     {-1.75ex \@plus -0.5ex \@minus -.2ex}%
     {0.5ex \@plus .2ex}%
     {\reset@font\Large\bfseries}}
\renewcommand\subsection{\@startsection{subsection}{2}{\z@}%
     {-1.75ex\@plus -0.5ex \@minus- .2ex}%
     {0.5ex \@plus .2ex}%
     {\reset@font\Large}}
\renewcommand\subsubsection{\@startsection{subsubsection}{3}{\z@}%
     {-1.5ex\@plus -0.35ex \@minus -.2ex}%
     {0.5ex \@plus .2ex}%
     {\reset@font\large}}
\renewcommand\paragraph{\@startsection{paragraph}{4}{\z@}%
     {-1ex \@plus-0.35ex \@minus -0.2ex}%
     {0.5ex \@plus .2ex}%
     {\reset@font\normalsize}}
\renewcommand\subparagraph{\@startsection{subparagraph}{5}{\parindent}%
     {1.5ex \@plus1ex \@minus .2ex}%
     {-1em}%
     {\reset@font\normalsize\bfseries}}


\def\l@section#1#2{\addpenalty{\@secpenalty} \addvspace{1.0em plus 1pt}
 \@tempdima 1.5em \begingroup
 \parindent \z@ \rightskip \@pnumwidth 
 \parfillskip -\@pnumwidth 
 \bfseries \leavevmode #1\hfil \hbox to\@pnumwidth{\hss #2}\par
 \endgroup}
\def\l@subsection{\@dottedtocline{2}{1.5em}{2.3em}}
\def\l@subsubsection{\@dottedtocline{3}{3.8em}{3.2em}}
\def\l@paragraph{\@dottedtocline{4}{7.0em}{4.1em}}
\def\l@subparagraph{\@dottedtocline{5}{10em}{5em}}
\@ifundefined{c@section}{\newcounter{section}}{}
\@ifundefined{c@chapter}{\newcounter{chapter}}{}
\newif\if@mainmatter 
\@mainmattertrue
\def\chaptername{Chapter}
\def\frontmatter{%
  \pagenumbering{roman}
  \def\thechapter{\@roman\c@chapter}
  \def\theHchapter{\roman{chapter}}
  \def\thesection{\@roman\c@section}
  \def\theHsection{\roman{section}}
  \def\@chapapp{}%
}
\def\mainmatter{%
  \cleardoublepage
  \def\thechapter{\@arabic\c@chapter}
  \setcounter{chapter}{0}
  \setcounter{section}{0}
  \pagenumbering{arabic}
  \setcounter{secnumdepth}{6}
  \def\@chapapp{\chaptername}%
  \def\theHchapter{\arabic{chapter}}
  \def\thesection{\@arabic\c@section}
  \def\theHsection{\arabic{section}}
}
\def\backmatter{%
  \cleardoublepage
  \setcounter{chapter}{0}
  \setcounter{section}{0}
  \setcounter{secnumdepth}{2}
  \def\@chapapp{\appendixname}%
  \def\thechapter{\@Alph\c@chapter}
  \def\theHchapter{\Alph{chapter}}
  \appendix
}
\newenvironment{bibitemlist}[1]{%
   \list{\@biblabel{\@arabic\c@enumiv}}%
       {\settowidth\labelwidth{\@biblabel{#1}}%
        \leftmargin\labelwidth
        \advance\leftmargin\labelsep
        \@openbib@code
        \usecounter{enumiv}%
        \let\p@enumiv\@empty
        \renewcommand\theenumiv{\@arabic\c@enumiv}%
	}%
  \sloppy
  \clubpenalty4000
  \@clubpenalty \clubpenalty
  \widowpenalty4000%
  \sfcode`\.\@m}%
  {\def\@noitemerr
    {\@latex@warning{Empty `bibitemlist' environment}}%
    \endlist}

\def\tableofcontents{\section*{\contentsname}\@starttoc{toc}}
\parskip0pt
\parindent1em
\def\Panel#1#2#3#4{\multicolumn{#3}{){\columncolor{#2}}#4}{#1}}
\newenvironment{reflist}{%
  \begin{raggedright}\begin{list}{}
  {%
   \setlength{\topsep}{0pt}%
   \setlength{\rightmargin}{0.25in}%
   \setlength{\itemsep}{0pt}%
   \setlength{\itemindent}{0pt}%
   \setlength{\parskip}{0pt}%
   \setlength{\parsep}{2pt}%
   \def\makelabel##1{\itshape ##1}}%
  }
  {\end{list}\end{raggedright}}
\newenvironment{sansreflist}{%
  \begin{raggedright}\begin{list}{}
  {%
   \setlength{\topsep}{0pt}%
   \setlength{\rightmargin}{0.25in}%
   \setlength{\itemindent}{0pt}%
   \setlength{\parskip}{0pt}%
   \setlength{\itemsep}{0pt}%
   \setlength{\parsep}{2pt}%
   \def\makelabel##1{\upshape ##1}}%
  }
  {\end{list}\end{raggedright}}
\newenvironment{specHead}[2]%
 {\vspace{20pt}\hrule\vspace{10pt}%
  \phantomsection\label{#1}\markright{#2}%

  \pdfbookmark[2]{#2}{#1}%
  \hspace{-0.75in}{\bfseries\fontsize{16pt}{18pt}\selectfont#2}%
  }{}
      \def\TheFullDate{2012-03-15 (revised: 15 March 2012)}
\def\TheID{\makeatother }
\def\TheDate{2012-03-15}
\title{Dynamic Power Reduction in Modified Lifting Scheme Based DWT for Image Processing}
\author{}\makeatletter 
\makeatletter
\newcommand*{\cleartoleftpage}{%
  \clearpage
    \if@twoside
    \ifodd\c@page
      \hbox{}\newpage
      \if@twocolumn
        \hbox{}\newpage
      \fi
    \fi
  \fi
}
\makeatother
\makeatletter
\thispagestyle{empty}
\markright{\@title}\markboth{\@title}{\@author}
\renewcommand\small{\@setfontsize\small{9pt}{11pt}\abovedisplayskip 8.5\p@ plus3\p@ minus4\p@
\belowdisplayskip \abovedisplayskip
\abovedisplayshortskip \z@ plus2\p@
\belowdisplayshortskip 4\p@ plus2\p@ minus2\p@
\def\@listi{\leftmargin\leftmargini
               \topsep 2\p@ plus1\p@ minus1\p@
               \parsep 2\p@ plus\p@ minus\p@
               \itemsep 1pt}
}
\makeatother
\fvset{frame=single,numberblanklines=false,xleftmargin=5mm,xrightmargin=5mm}
\fancyhf{} 
\setlength{\headheight}{14pt}
\fancyhead[LE]{\bfseries\leftmark} 
\fancyhead[RO]{\bfseries\rightmark} 
\fancyfoot[RO]{}
\fancyfoot[CO]{\thepage}
\fancyfoot[LO]{\TheID}
\fancyfoot[LE]{}
\fancyfoot[CE]{\thepage}
\fancyfoot[RE]{\TheID}
\hypersetup{citebordercolor=0.75 0.75 0.75,linkbordercolor=0.75 0.75 0.75,urlbordercolor=0.75 0.75 0.75,bookmarksnumbered=true}
\fancypagestyle{plain}{\fancyhead{}\renewcommand{\headrulewidth}{0pt}}

\date{}
\usepackage{authblk}

\providecommand{\keywords}[1]
{
\footnotesize
  \textbf{\textit{Index terms---}} #1
}

\usepackage{graphicx,xcolor}
\definecolor{GJBlue}{HTML}{273B81}
\definecolor{GJLightBlue}{HTML}{0A9DD9}
\definecolor{GJMediumGrey}{HTML}{6D6E70}
\definecolor{GJLightGrey}{HTML}{929497} 

\renewenvironment{abstract}{%
   \setlength{\parindent}{0pt}\raggedright
   \textcolor{GJMediumGrey}{\rule{\textwidth}{2pt}}
   \vskip16pt
   \textcolor{GJBlue}{\large\bfseries\abstractname\space}
}{%   
   \vskip8pt
   \textcolor{GJMediumGrey}{\rule{\textwidth}{2pt}}
   \vskip16pt
}

\usepackage[absolute,overlay]{textpos}

\makeatother 
      \usepackage{lineno}
      \linenumbers
      
\begin{document}

             \author[1]{Dr.  C.Chandrasekhar}

             \author[2]{Dr. S.Narayana  Reddy}

             \affil[1]{  S.V.University, Tirupathi.}

\renewcommand\Authands{ and }

\date{\small \em Received: 11 February 2012 Accepted: 2 March 2012 Published: 15 March 2012}

\maketitle


\begin{abstract}
        


Image compression is one of the major applications in image processing that imposes greater design challenges for VLSI design engineers in design and development of low power and high speed architectures. DWT is used in image compression for transformation of image from spatial to frequency domain. In this paper, DWT architecture based on lifting scheme is considered and dynamic power reduction is achieved with suitable modifications to the architecture and adoption of low power techniques. The interdependency of scaling and dilation coefficients is simplified to single hierarchy and thus reduces latency and increases throughput. Wallace tree multiplier and carry select adder are used in realizing 1D DWT architecture. The hierarchy in the design enables to adopt multi-stage and hierarchical clock gating technique thus reducing dynamic power. Power gating and DVFS techniques are also adopted to optimize power dissipation. The modified lifting architecture operates at a maximum frequency of 290MHz, and reduces power by more than 50%. The proposed design is implemented using 65nm TSMC low power library cells and is synthesized using Synopsys DC. The TCL scripts developed optimizes dynamic power dissipation.

\end{abstract}


\keywords{Dynamic power dissipation, DWT, Lifting Scheme, Hierarchical design, low power design ASIC implementation.}

\begin{textblock*}{18cm}(1cm,1cm) % {block width} (coords) 
\textcolor{GJBlue}{\LARGE Global Journals \LaTeX\ JournalKaleidoscope\texttrademark}
\end{textblock*}

\begin{textblock*}{18cm}(1.4cm,1.5cm) % {block width} (coords) 
\textcolor{GJBlue}{\footnotesize \\ Artificial Intelligence formulated this projection for compatibility purposes from the original article published at Global Journals. However, this technology is currently in beta. \emph{Therefore, kindly ignore odd layouts, missed formulae, text, tables, or figures.}}
\end{textblock*}


\let\tabcellsep& 	 	 		 
\section[{Introduction}]{Introduction}\par
WT is recommended by JPEG2000 standards as it supports features like progressive transmission, higher compression and region of interest encoding schemes. Convolution based DWT or FIR filter bank based DWT architectures occupy large area as they require more number of multipliers and adders, thus making the computations complex and time consuming. Mobile phones and other similar hand held devices that support image//video applications demand high speed and low power architectures with reduced memory size for DWT processing. There are several architectures discussed in literature to perform lifting based DWT. General approach for 2-D DWT is to apply the 1-D DWT row-wise which produces L and H subbands and then process these sub-bands columnwise to get LL, LH, HL and HH coefficients. Several architectures like direct mapped \hyperref[b1]{[2]}, folded \hyperref[b2]{[3]}, and flipping \hyperref[b3]{[4]} for single level and multi-level DWT have been proposed to implement 1-D lifting DWT . Many Author ? : HOD, Dept.of ECE SVCET, CHITTOOR, prof \& Head in Dept.of ECE, S.V.University, Tirupathi E-mail : snreddysvu@yahoo.com architectures that implement the Two-Dimensional separable Forward (2D-DWT) and Inverse DWT (2D-IDWT) in order to be applied on 2D signals have been presented in the past \hyperref[b4]{[5]}, \hyperref[b5]{[6]}, \hyperref[b6]{[7]} and \hyperref[b7]{[8]} . These architectures are consisting of filters for performing the 1D-DWT and memory units for storing the results of the transformation. Due to the fact that streaming multimedia applications -in which the DWT is presentare characterized by high throughput requirements, this imposes the need for optimizing the design of the filters in terms of speed. Moreover, portable multimedia devices require low power consumption for increasing the battery lifetime and this can be achieved by minimizing the storage size and number of memory accesses \hyperref[b8]{[9]}. Low power DWT architectures based on pipelining and parallel processing has been discussed in \hyperref[b9]{[10]} and  {\ref [11]}, in their work low power is achieved by modifying the architecture to reduce number of computations the design was implemented on FPGA. Many of the low power techniques reported in literature \hyperref[b11]{[12]}, \hyperref[b12]{[13]}, \hyperref[b13]{[14]} and \hyperref[b14]{[15]} for DWT propose modifications in the architecture level to reduce power dissipation. Power reduction can be accomplished at various levels of abstraction starting from architecture level to circuit level. Power reduction at the sub system level or at the circuit level can be accomplished when ASIC design of DWT architecture is performed. Many of the work reported in literature have restricted to FPGA implementation. In this paper, in order to demonstrate the dynamic power reduction techniques at various levels of abstraction, DWT architecture is considered as a test case for illustration. ASIC design of DWT architecture optimizing dynamic power reduction using 65nm TSMC libraries is performed.\par
Section II discusses wavelet transforms, DWT architecture and dynamic low power reduction techniques. Section III discusses proposed low power schemes for design DWT architecture sub systems. Section IV presents ASIC implementation of DWT architecture based on low power schemes. Section V discusses implementation results and performance comparison and section VI presents conclusion. 
\section[{a) DWT and Low Power Schemes}]{a) DWT and Low Power Schemes}\par
In this section, DWT architecture and low power schemes are presented. Lifting scheme based DWT architecture is considered as test case for dynamic power reduction and is briefly discussed in this section. 
\section[{i. DWT architecture}]{i. DWT architecture}\par
In wavelet analysis, signals are represented using a set of basis functions derived by shifting and scaling a single prototype function, referred to as "mother wavelet", in time \hyperref[b15]{[16]}. Wavelet transforms are closely related to tree structured digital filter banks and multiresolution analysis. A set of wavelet basis functions can be generated by translating and dilating the mother wavelet. A number of architectures have been proposed for calculation of DWT \hyperref[b1]{[2]}, \hyperref[b2]{[3]}, \hyperref[b3]{[4]}, \hyperref[b4]{[5]} and \hyperref[b5]{[6]}. The architectures are mostly folded and can be broadly classified into serial architectures (where the inputs are supplied to the filters in a serial manner) and parallel architectures (where the inputs are supplied to the filters in a parallel manner). A methodology for implementing lifting-based DWT that reduces the memory requirements and communication between the processors, when the input is broken up into blocks is presented in \hyperref[b16]{[17]}. Figure \hyperref[fig_0]{1}  [17]   The z-1 blocks are for delay, ?, ?, ?, ?, ? are the lifting coefficients and the shaded blocks are registers. 9/7 filter has been used for implementation which requires four steps for lifting and one step for scaling. The input signal xi is split into two parts even part x 2i and odd part x 2i+1 then the first step of lifting performed is given by the equations \hyperref[b16]{[17]}.\par
Then the second lifting step performed gives:\par
Then scaling is performed and the following equations are obtained:di 1 = ? (x2i + x2i+2) + x2i+1 ai 1 = ? (di 1 + di 1 -1) + x2i di 2 = ? (ai 1 + ai 1 +1) + di 1 ai 2 = ? (di 2 + di 2 -1) + ai 1 ai = ? ai 2 di = di 2 ? ?\par
The predict step helps determine the correlation between the sets of data and predicts even data samples from odd. These samples are used in the update step for updating the present phase. Some of the properties of the original input data can be maintained in the reduced set also by construction of a new operator using the update step. The lifting coefficients have constant values of -1.58613, -0.0529, 0.882911, 0.44350, -1.1496 for ?, ?, ?, ?, ? respectively. ai and di are DWT outputs after level 1 decompisition.\par
ii. Sources of power dissipation in CMOS VLSI circuits Power consumption in CMOS digital circuits is divided two major components (Static and Dynamic) as shown in Figure \hyperref[fig_3]{2 (a)}. Static power is due to leakage current and short circuit current, dynamic power is due to switching current. Power dissipation is CMOS is exponentially increased with scaling in transistor size. Figure \hyperref[fig_3]{2(b)} shows the power dissipation in CMOS with technology scaling. Dynamic power dissipation was dominating with 250nm technology, with technology scaling towards lower geometries (65nm and below), leakage power has significantly increased. However, dynamic power has also exponentially increased; this is due to the fact in increase in switching current and frequency of operation of CMOS circuits. There are various low power reduction techniques such as \hyperref[b17]{[18]}    
\section[{Global Journal of Researches in Engineering}]{Global Journal of Researches in Engineering} 
\section[{Subsystem Designs for Dwt Architecture}]{Subsystem Designs for Dwt Architecture}\par
An adder is the most commonly used arithmetic block in the Central Processing Unit (CPU) of a microprocessor, a Digital Signal Processor (DSP), and even in a variety of ASICs. In a DWT processor, adder is one of the important building blocks, required to compute the DWT coefficients of input signal. Multiplier used in a DWT processor also requires adder to add the partial products. Hence, design and analysis of adder is considered in this section. Speed and optimization of power of an adder is significant, to improve the overall performance of the system. But an adder also experiences the power-delay trade off. That is, its power dissipation increases with reduction in delay and vice versa. There are various architectures for adder design. 4-bit adders can be of different types. Some of those are Carry look Ahead Adder, Ripple Carry Adder, Carry Save Adder, Carry Select Adder. In many digital signal processing operations-such as correlations, convolutions, filtering, and frequency analysis-one needs to perform multiplication. Multiplication algorithms will be used to illustrate methods of designing different cells so that they fit into a larger structure. In order to introduce these designs, simple and serial and parallel multipliers will be introduced. High-speed parallel multipliers are becoming one of the keys in RISCs (Reduced Instruction Set Computers), DSPs (Digital Signal Processors), and graphics accelerators and so on. Parallel multipliers are used in data processor as well as in digital signal processors. There are various multiplier architectures reported in literature, Wallace tree, booths multiplier, BZ-FAD multiplier, Shift and Add multiplier and Array multiplier are most popular for DSP applications. In this work, the adders and multipliers are modeled using HDL and is synthesized using TSMC 65nm CMOS libraries using Synopsys DC. The synthesis results generate reports that provide information on area, delay and power dissipation. The results obtained are presented in table 1 and table 2 is without low power techniques. Multipliers are designed using carry save adders.  In order to reduce power dissipation of adder and multiplier, multi VDD technique is adopted. Reducing VDD supply voltage, reduce the power consumption, there will be no effect on area. From the results obtained it is found that power consumption is a quadratic function of voltage (Power= fCV DD 2 ). Decrease in supply voltage increases the overall delay (Delay = (KV DD /V DD -V t ) ? .\par
The synthesis results generate reports that provide information on area, delay and power dissipation. The results obtained are presented in table 1 and table 2 is without low power techniques. Multipliers are designed using carry save adders. Lifting equations presented in (  {\ref 1}) -(  {\ref 6}) when realized using HDL model is a sequential process, as the scaling factors and are dependent on previous samples, thus introducing latency. In order to increase throughput and latency modified equation are derived. The modified lifting equations eliminate dependency of outputs on previous samples. We have obtained the equations for a i and d i by substituting (4) in (  {\ref 3}), (  {\ref 3}) in (  {\ref 2}) and so on. The lifting coefficients were substituted and the results were scaled by multiplying with 256 to avoid decimal and to round off the values. The modified lifting scheme equations are: These equations are obtained by taking coefficients as common. The equations have initial latency, as the input samples need to be stored before DWT ai and di coefficients computations.? a i = 294* (8(6*x 2i +4*x 2i-2 +x 2i +4+x 2i+4 +x 2i-4 +4*x 2i+2\par
The design of low power architecture to reduce dynamic power dissipation is based on equations \hyperref[b6]{(7)} and \hyperref[b7]{(8)}. From the equation the following are the observations made: The proposed architecture shown in Figure \hyperref[fig_7]{4} takes two inputs and gives two outputs per cycle. Data1 and Data2 are the odd and even input samples given to hardware in single clock for 100 \% hardware utilization. This architecture is very simple design as compared to other architectures suggested in \hyperref[b19]{[20]} which have complex control path to achieve 100\% hardware utilization. The row processor and column processor shown in figure \hyperref[fig_7]{4} are realized using modified lifting scheme based equations.\par
Figure  {\ref 3} : Row processor and column processor for modified lifting DWT Based on the architecture shown in figure  {\ref 3} and equations presented in \hyperref[b6]{(7)} and (  {\ref 8}), the top level model for the architecture is shown in figure \hyperref[fig_7]{4}. A detailed data flow for the proposed architecture is presented in the The modified architecture derived consists of the following blocks: parallel input and serial output register, serial input and parallel output register, Multiplier and adders and control unit. The HDL model is developed and the design is verified for its functionality using test bench in ModelSim. The functionally correct HDL code is synthesized using Synopsys DC targeting TSMC 65 nm library and technology files. The reports obtained are complied and presented in table \hyperref[tab_4]{4}. From the results obtained and tabulated in Table \hyperref[tab_4]{4}, it is found that due to changes in architecture that reduces number of stages in DWT computation, the dynamic power dissipation is reduced be 37\%. However, the area is increased due to increase in registers and intermediate storage units, the design is synthesized to obtain minimum delay and zero slack requirement. Due to architectural changes it is demonstrated that dynamic power is reduced by 37\%. In order to further reduce power dissipation various other dynamic low power techniques are introduced for optimization. The simplest, general (or automatic) clock gating inserts a single clock gate for each register bank. Most tools permit the user "split" register banks or to prevent clock gate "sharing" across unrelated register banks. To save even more dynamic power, advanced clock gating styles such as multi-stage and hierarchical can be used, depending on design architecture and design requirements. The modified lifting DWT have common coefficients and thus need to be enabling at different instants of time and hence multi-stage clock gating technique is implemented. The 2D DWT architecture is realized using sub systems (multipliers, adders and registers), 1D DWT and finally 2D DWT, in order to reduce power dissipation hierarchical clock gating technique is adopted. Figure \hyperref[fig_5]{5} shows the multistage clock gating technique introduced into the row processor. Enable adder enables all adders together, similarly the enable reg enables all intermediate registers, thus saving power.  In order to implement power gating technique power gates and state retention register required. Power gating cells are required for turning blocks on and off. State retention registers in their turn are useful because, if the state of a shut down or "sleeping" block needs to be retained the most automated method to retain the state is the use of retention registers. These registers have a backup power supply connection that remains always on to hold the state of the register via a high voltage threshold latch built into the register. An isolation cell is required to ensure electrical and logical isolation of logic that is shut down from active logic in a design. The reason this is required is because when a block is shut down the internal signal level will transition to an unknown, floating state. Also always on cells are required between switched and steady state blocks to ensuring interoperability. Figure \hyperref[fig_11]{7} shows the power gating logic for dynamic power reduction. Multiple voltages are used to drive the cells that are active or in standby. In the hierarchical design shown in Figure \hyperref[fig_10]{6}, 1D DWT are active during computation and inactive during data storage, thus power gating techniques are inserted. The most common approach to provide state retention during power gating is to replace a standard register with a retention register. To achieve further improvements in power reduction without resorting to custom circuit techniques, Dynamic Voltage and Frequency Scaling can be used. Dynamic Voltage and Frequency Scaling is effective because of the following two facts:\par
? The amount of energy required to complete a task is proportional to the square of the supply voltage.\par
? The maximum frequency of any CMOS circuit is proportional to the supply voltage.\par
So if the supply voltage is decreased there is a square-law reduction in energy to complete a given task. However the task takes longer to complete because of the linear reduction in frequency. Therefore, the principle gain with Dynamic Voltage and Frequency Scaling is with respect to dynamic power consumption.\par
Dynamic voltage and frequency scaling adjusts performance and energy consumption levels while the logic circuit is active. It is required to reduce processor frequency and voltage to obtain quadratic energy savings. DVFS is an effective way of reducing the CPU energy consumption by providing computation power.\par
DVFS technique has been proven to be a highly effective technique for power minimization subject to a performance constraint. DVFS should consider not only the CPU power, but also the total system power dissipation. In this work, to realize 2D DWT, multiple 1D DWT architecture is realized using modified lifting scheme logic. Thus DVFS is adopted to minimize power dissipation.\par
DVFS computation for modified lifting DWT: Workload of a task, W task , is defined as the total number of clock cycles required to compute 1D DWT.  
\section[{Asic Implementation and Result Analysis}]{Asic Implementation and Result Analysis}\par
The simulation results for modified DWT are presented in this section. There are sixty four inputs, each having bit width of twenty bits. These inputs are serially sent to the DWT architecture. The DWT consists of registers, multiplexer, adder and multiplier. Whenever the inputs are sent through SIPO (serial input parallel output), the data has been divided into even data and odd data. The even data and odd data are stored in the temporary registers. When the reset is high, the temporary register value consists of zero, whenever the reset is low, the input data is split into the even data and odd data. The input data is read up to sixty four clock cycles, after that the data read according to the lifting scheme. The output data consists of low pass and high pass elements. This is the 1-D discrete wavelet    
\section[{Implementation Results and Discussion}]{Implementation Results and Discussion}\par
In this work, ASIC design flow is restricted to synthesis only for the modified lifting DWT, thus low power libraries and low power IPS from Synopsys design ware are adopted for synthesis. The synthesis constraint file is set for low power synthesis, the Synopsys DC constraints are:\par
transform. The two level discrete wavelet transform is  The constraints are set according to the command set in the file above. The low power constraints are supported only if the RTL is hierarchical and is parallel in nature. The constraints file is shown in below. The constraints for dynamic power reduction discussed earlier are set in a constraints file and are used for synthesis. The TCL scripts for DWT\textunderscore TOP\textunderscore MODULE are presented below and are used for synthesis. Figure 10 shows the synthesis netlist obtained using 65nm technology and the interconnections used in the design along with clock tree network. Figure \hyperref[fig_0]{11} shows the synthesized netlist along with clock tree network.\par
RTL model developed for the modified lifting scheme based DWT architecture is remodeled for ASIC implementation. The design is synthesized using Design Compiler and timing analysis is carried out using Prime Time. The design requires 42 input-output ports and requires 550 cells. The total combinational area is 21527.410 sq umm and non-combinational area is 10256.23 sq umm. Total dynamic power is 498.36 ?W. Due to the low power techniques adopted the dynamic power dissipation is reduced by 19\%. From the results obtained, design of architecture achieves 37\% power reduction; low power techniques presented in this section reduces power dissipation by 17\%. Thus maximum power dissipation is achieved at the architecture abstract level. Power saving achieved at various levels of hierarchy is proven in this work. Starting from architecture level to circuit level, power reduction need to be performed and is illustrated in this work.\par
V. 
\section[{Conclusion}]{Conclusion}\par
In this work, a modified lifting based DWT is proposed, designed and implemented using 64nm TSMC low power design library. Lifting based DWT is considered to illustrate the techniques that can be adopted to reduce dynamic power. Modification in the architecture level as well as at different abstraction levels are considered for power reduction. Low power library cells from Synopsys design ware are considered for synthesis. TCL scripts for constraining the design for various dynamic power dissipation are developed. The RTL model developed is synthesized and performances are estimated. From the results obtained it is found that there is a total of 50\% power reduction as compared with direct implementation. The developed low power techniques can be adopted to other complex designs. Further power dissipation can be reduced at the physical design stage.   
\section[{VI.}]{VI.} 
\section[{Parameters}]{Parameters}\begin{figure}[htbp]
\noindent\textbf{1}\includegraphics[]{image-2.png}
\caption{\label{fig_0}Figure 1 :}\end{figure}
 \begin{figure}[htbp]
\noindent\textbf{}\includegraphics[]{image-3.png}
\caption{\label{fig_1}}\end{figure}
 \begin{figure}[htbp]
\noindent\textbf{}\includegraphics[]{image-4.png}
\caption{\label{fig_2}}\end{figure}
 \begin{figure}[htbp]
\noindent\textbf{2}\includegraphics[]{image-5.png}
\caption{\label{fig_3}Figure 2 :}\end{figure}
 \begin{figure}[htbp]
\noindent\textbf{}\includegraphics[]{image-6.png}
\caption{\label{fig_4}}\end{figure}
 \begin{figure}[htbp]
\noindent\textbf{5}\includegraphics[]{image-7.png}
\caption{\label{fig_5}) - 5 *}\end{figure}
 \begin{figure}[htbp]
\noindent\textbf{}\includegraphics[]{image-8.png}
\caption{\label{fig_6}?}\end{figure}
 \begin{figure}[htbp]
\noindent\textbf{4}\includegraphics[]{image-9.png}
\caption{\label{fig_7}Figure 4 :}\end{figure}
 \begin{figure}[htbp]
\noindent\textbf{}\includegraphics[]{image-10.png}
\caption{\label{fig_8}}\end{figure}
 \begin{figure}[htbp]
\noindent\textbf{5}\includegraphics[]{image-11.png}
\caption{\label{fig_9}Figure 5 :}\end{figure}
 \begin{figure}[htbp]
\noindent\textbf{6}\includegraphics[]{image-12.png}
\caption{\label{fig_10}Figure 6 :}\end{figure}
 \begin{figure}[htbp]
\noindent\textbf{7}\includegraphics[]{image-13.png}
\caption{\label{fig_11}Figure 7 :}\end{figure}
 \begin{figure}[htbp]
\noindent\textbf{}\includegraphics[]{image-14.png}
\caption{\label{fig_12}}\end{figure}
 \begin{figure}[htbp]
\noindent\textbf{}\includegraphics[]{image-15.png}
\caption{\label{fig_13}}\end{figure}
 \begin{figure}[htbp]
\noindent\textbf{8}\includegraphics[]{image-16.png}
\caption{\label{fig_14}Figure 8 :}\end{figure}
     \begin{figure}[htbp]
\noindent\textbf{1} \par 
\begin{longtable}{}
\end{longtable} \par
 
\caption{\label{tab_1}Table 1 :}\end{figure}
 \begin{figure}[htbp]
\noindent\textbf{2} \par 
\begin{longtable}{}
\end{longtable} \par
 
\caption{\label{tab_2}Table 2 :}\end{figure}
 \begin{figure}[htbp]
\noindent\textbf{3} \par 
\begin{longtable}{P{0.4322857142857143\textwidth}P{0.019428571428571427\textwidth}P{0.15057142857142855\textwidth}P{0.18942857142857142\textwidth}P{0.05828571428571429\textwidth}}
Type of\tabcellsep \tabcellsep No. of\tabcellsep \multicolumn{2}{l}{Power -Delay}\\
adder (16 -\tabcellsep bit)\tabcellsep \multicolumn{2}{l}{transistors ?W}\tabcellsep -ps\\
Ripple carry\tabcellsep \tabcellsep 286\tabcellsep 40.5505\tabcellsep 600\\
adders\tabcellsep \tabcellsep \tabcellsep \\
Carry save\tabcellsep \tabcellsep 92\tabcellsep 18.9241\tabcellsep 74\\
adder\tabcellsep \tabcellsep \tabcellsep \\
Carry select\tabcellsep \tabcellsep 102\tabcellsep 16.897\tabcellsep 65\\
adder\tabcellsep \tabcellsep \tabcellsep \\
Carry look\tabcellsep \tabcellsep 621\tabcellsep 55.1482\tabcellsep 62\\
\multicolumn{2}{l}{ahead adder}\tabcellsep \tabcellsep \end{longtable} \par
 
\caption{\label{tab_3}Table 3 :}\end{figure}
 \begin{figure}[htbp]
\noindent\textbf{4} \par 
\begin{longtable}{P{0.85\textwidth}}
DWT\end{longtable} \par
 
\caption{\label{tab_4}Table 4 :}\end{figure}
 			\footnote{© 2012 Global Journals Inc. (US) Dynamic Power Reduction in Modified Lifting Scheme Based DWT for Image Processing} 			\footnote{© 2012 Global Journals Inc. (US)} 		 		\backmatter   			 
\subsection[{Acknowledgement}]{Acknowledgement}\par
The authors would like to acknowledge Dr. Cyril Prasanna Raj P, for his valuable support and guidance extended in completion of this work. 			  			  				\begin{bibitemlist}{1}
\bibitem[Wu and Lin]{b9}\label{b9} 	 		\textit{A High-Performance and Memory-Efficient},  		 			Bing-Fei Wu 		,  		 			Chung-Fu Lin 		.  		 	 
\bibitem[Lai et al. (2009)]{b14}\label{b14} 	 		‘A Highperformance and Memory-Efficient VLSI Architecture with Parallel Scanning method for 2-D Lifting-Based Discrete Wavelet Transform’.  		 			Yeong-Kang Lai 		,  		 			Lien-Fei Chen 		,  		 			Yui-Chih Shih 		.  	 	 		\textit{IEEE Transaction on Consumer Electronics}  		May 2009. 55  (2) .  	 
\bibitem[Acharya and Chakrabarti ()]{b16}\label{b16} 	 		‘A Survey on Lifting-based Discrete Wavelet Transform Architectures’.  		 			Tinku Acharya 		,  		 			Chaitali Chakrabarti 		.  	 	 		\textit{Journal of VLSI Signal Processing}  		2006. 42 p. .  	 
\bibitem[Chakrabarti et al. ()]{b5}\label{b5} 	 		‘Architectures for wavelet transforms: A survey’.  		 			C Chakrabarti 		,  		 			M Vishwanath 		,  		 			R M Owens 		.  	 	 		\textit{Journal of VLSI Signal Processing}  		1996. 4  (2)  p. .  	 
\bibitem[Catthoor et al. ()]{b8}\label{b8} 	 		\textit{Custom Memory Management Methodology -Exploration of Memory management Organization for Embedded Multimedia System Design},  		 			F Catthoor 		,  		 			S Wuytack 		,  		 			E De Greff 		,  		 			F Balasa 		,  		 			L Nachtergale 		,  		 			A Vandecappele 		.  		1998. Kluwer Academic Publishers.  	 
\bibitem[Cyril Prasanna Raj P, Low power DWT for image compression SASTech Journal ()]{b11}\label{b11} 	 		‘Cyril Prasanna Raj P, Low power DWT for image compression’.  	 	 		\textit{SASTech Journal}  		2008. 7 p. .  	 
\bibitem[Liu et al. ()]{b1}\label{b1} 	 		‘Design and Implementation of a Progressive Image Coding Chip Based on the Lifted Wavelet Transform’.  		 			C C Liu 		,  		 			Y H Shiau 		,  		 			J M Jou 		.  	 	 		\textit{Proc. of the 11th VLSI Design/CAD Symposium},  				 (of the 11th VLSI Design/CAD SymposiumTaiwan)  		2000.  	 
\bibitem[Shanthala et al.]{b18}\label{b18} 	 		‘Design and VLSI implementation of Pipelined Multiply Accumulate Unit’.  		 			Cyril S Prasanna Shanthala 		,  		 			P Raj 		,  		 			Dr S Y Kulkarni 		.  	 	 		\textit{was presented at International Conference on Emerging Trends in Engineering and Technology (ICETET 09) during 16th -18th December 2009 at G.H. Raisoni College of Engineering},  				 (Nagpur (Maharashtra)  		 	 
\bibitem[Marino ()]{b12}\label{b12} 	 		‘Efficient high-speed/low-power pipelined architecture for the direct 2-D discrete wavelet transform’.  		 			F Marino 		.  	 	 		\textit{IEEE Trans. Circuits Systems}  		2000. II  (12)  p. .  	 
\bibitem[Chakrabarti and Vishwanath (1995)]{b4}\label{b4} 	 		‘Efficient realizations of the discrete and continuous wavelet transforms: from single chip implementations to SIMD parallel computers’.  		 			C Chakrabarti 		,  		 			M Vishwanath 		.  	 	 		\textit{IEEE Trans. Signal Processing}  		March 1995. 43  (3)  p. .  	 
\bibitem[Zervas et al. (2001)]{b7}\label{b7} 	 		‘Evaluation of design alternatives for the 2-Ddiscrete wavelet transform’.  		 			N D Zervas 		,  		 			G P Anagnostopoulos 		,  		 			V Spiliotopoulos 		,  		 			Y Andreopoulos 		,  		 			C E Goutis 		.  	 	 		\textit{IEEE Trans. Circuits and Syst. Video Technol}  		December 2001. 11  (2)  p. .  	 
\bibitem[Daubechies and Sweldens ()]{b0}\label{b0} 	 		‘Factoring Wavelet transforms into Lifting Schemes’.  		 			I Daubechies 		,  		 			W Sweldens 		.  	 	 		\textit{The J. of Fourier Analysis and Applications}  		1998. 4 p. .  	 
\bibitem[Huang et al. ()]{b3}\label{b3} 	 		‘Flipping Structure: An Efficient VLSI Architecture for Lifting-Based Discrete Wavelet Transform’.  		 			C T Huang 		,  		 			P C Tseng 		,  		 			L G Chen 		.  	 	 		\textit{IEEE Transactions on Signal Processing},  				2004. p. .  	 
\bibitem[Park and Jung ()]{b13}\label{b13} 	 		‘High speed lattice based VLSI architecture of 2D discrete wavelet transform for real-time video signal processing’.  		 			T Park 		,  		 			S Jung 		.  	 	 		\textit{IEEE Trans. Consumer Elect}  		2002. 48  (4)  p. .  	 
\bibitem[Lian et al. ()]{b2}\label{b2} 	 		‘Lifting Based Discrete Wavelet Transform Architecture for JPEG 2000’.  		 			C Lian 		,  		 			K F Chen 		,  		 			H H Chen 		,  		 			L G Chen 		.  	 	 		\textit{IEEE International Symposium on Circuits and Systems},  				 (Sydney, Australia)  		2001. p. .  	 
\bibitem[Darji et al.]{b19}\label{b19} 	 		‘Memory Efficient and Low power VLSI architecture for 2-D Lifting based DWT with Dual data Scan Technique’.  		 			A D Darji 		,  		 			A N Chandorkar 		,  		 			S N Merchant 		.  	 	 		\textit{Recent Researches in Circuits, Systems and Signal Processing},  				 	 
\bibitem[Vaidyanathan ()]{b15}\label{b15} 	 		\textit{Multirate systems and Filter Banks},  		 			P P Vaidyanathan 		.  		1993. Englewood Cliffs: Prenctice-Hall.  	 
\bibitem[Nagabushnam, Cyril Prasanna Raj P, Ramachandra, Design and FPGA Implementation of Modified Distributive Arithmetic Based DWT-IDWT Processor for Image Compression IEEE Trans. on circuit and systems for video Technology ()]{b10}\label{b10} 	 		‘Nagabushnam, Cyril Prasanna Raj P, Ramachandra, Design and FPGA Implementation of Modified Distributive Arithmetic Based DWT-IDWT Processor for Image Compression’.  	 	 		\textit{IEEE Trans. on circuit and systems for video Technology}  		December 2005 11. 2009. 15  (12)  p. .  	 	 (European Journal of Scientific Research) 
\bibitem[Neil et al. ()]{b17}\label{b17} 	 		 			H E Neil 		,  		 			David Weste 		,  		 			Harris 		.  		\textit{CMOS VLSI Design -A Circuit and System Perspective},  				2005. Pearson Education.  	 	 (3rd edition) 
\bibitem[Vishwanath et al. (1995)]{b6}\label{b6} 	 		‘VLSI architectures for the discrete wavelet transform’.  		 			R M Vishwanath 		,  		 			M J Owens 		,  		 			Irwin 		.  	 	 		\textit{IEEE Trans. Circuits and Syst}  		May 1995. II  (5) .  	 
\end{bibitemlist}
 			 		 	 
\end{document}
