Monday, April 19, 2010

04/19 : Func Sample Code

 

/****************************************************************************
*
*
* Author : Dayang Sun
*
* File   : Program.cs
*
* Date   : 4/19/2010 5:00:49 PM
*
* 使用Func<T,TResult>和Action<T>,Action而不使用Delegate其实都是为了简化代码, 使用更少的代码达到相同的效果,不需要我们显示的声明一个委托,Func<T,TResult>的最后一个参数始终是返回类型,而 Action<T,TResult>是没有返回类型的,而Action是没有返回类型和参数输入的。

*
* ***************************************************************************/
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace ConsoleApplication1
{
    delegate int Searcher(string searchString, int start, int count,
                         StringComparison type);

    class Program
    {

        // Without using Func generic deletegate
        static void Main(string[] args)
        {
            //WithoutFunc();
            //WithFunc();
            //WithFuncAnonymous();
            //WithFuncLamada();
            WithFuncLing();
        }

        // Without using Func generic deletegate
        static void WithoutFunc()
        {
            string title = "The House of the Seven Gables";
            int position = 0;
            Searcher finder = title.IndexOf;
            do
            {
                int characters = title.Length - position;
                position = finder("the", position, characters,
                                StringComparison.InvariantCultureIgnoreCase);
                if (position >= 0)
                {
                    position++;
                   Console.WriteLine ( String.Format("'The' found at position {0} in {1}.",
                                      position, title) + "\n");
                }
            } while (position > 0);

        }

        // By this way, can not add more code except call indexof function
        static void WithFunc()
        {
            string title = "The House of the Seven Gables";
            int position = 0;
            Func<string, int, int, StringComparison, int> finder = title.IndexOf;
            do
            {
                int characters = title.Length - position;
                position = finder("the", position, characters,
                                StringComparison.InvariantCultureIgnoreCase);
                if (position >= 0)
                {
                    position++;
                    Console.WriteLine(String.Format("'The' found at position {0} in {1}.",
                                      position, title) + "\n");
                }
            } while (position > 0);

        }

        static void WithFuncAnonymous()
        {
            string title = "The House of the Seven Gables";
            int position = 0;
            Func<string, int, int, StringComparison, int> finder =
                 delegate(string s, int pos, int chars, StringComparison type)
                 {
                     Console.WriteLine("sdy");
                     return title.IndexOf(s, pos, chars, type); };
            do
            {
                int characters = title.Length - position;
                position = finder("the", position, characters,
                                StringComparison.InvariantCultureIgnoreCase);
                if (position >= 0)
                {
                    position++;
                    Console.WriteLine(String.Format("'The' found at position {0} in {1}.",
                                      position, title) + "\n");
                }
            } while (position > 0);

        }

        static void WithFuncLamada()
        {
            string title = "The House of the Seven Gables";
            int position = 0;
            Func<string, int, int, StringComparison, int> finder =
               (s, pos, chars, type) =>
               {
                   Console.WriteLine("WithFuncLamada");
                 return  title.IndexOf(s, pos, chars, type);
               };
            do
            {
                int characters = title.Length - position;
                position = finder("the", position, characters,
                                StringComparison.InvariantCultureIgnoreCase);
                if (position >= 0)
                {
                    position++;
                    Console.WriteLine(String.Format("'The' found at position {0} in {1}.",
                                      position, title) + "\n");
                }
            } while (position > 0);

        }

        static void WithFuncLing()
        {
            Func<String, int, bool> predicate = (str, index) => str.Length == index;

            String[] words = { "orange", "apple", "Article", "elephant", "star", "andty" };
            IEnumerable<String> aWords = words.Where(predicate).Select(str => str);

            foreach (String word in aWords)
               Console.WriteLine ( word + "\n");

        }
    }

}

04/19: AddFileCommentMacro In Visual Studio

 

 

Macro Code :

 

Option Strict Off
Option Explicit Off
Imports System
Imports EnvDTE
Imports EnvDTE80
Imports EnvDTE90
Imports EnvDTE90a
Imports EnvDTE100
Imports System.Diagnostics

Public Module RecordingModule

    Sub AddCommentToFile()
        DTE.ActiveDocument.Selection.Text = "/****************************************************************************"
        DTE.ActiveDocument.Selection.NewLine(3)
        DTE.ActiveDocument.Selection.Text = "Name : Dayang Sun"
        DTE.ActiveDocument.Selection.NewLine()
        DTE.ActiveDocument.Selection.LineUp()
        DTE.ActiveDocument.Selection.Delete(4)
        DTE.ActiveDocument.Selection.Text = "Author"
        DTE.ActiveDocument.Selection.LineDown()
        DTE.ActiveDocument.Selection.NewLine()
        DTE.ActiveDocument.Selection.Text = "File   : " + DTE.ActiveDocument.Name
        DTE.ActiveDocument.Selection.NewLine(2)
        DTE.ActiveDocument.Selection.Text = "Date:"
        DTE.ActiveDocument.Selection.CharLeft()
        DTE.ActiveDocument.Selection.Text = "   "
        DTE.ActiveDocument.Selection.CharRight(False, 2)
        DTE.ActiveDocument.Selection.CharLeft()
        DTE.ActiveDocument.Selection.Text = " " + DateTime.Now
        DTE.ActiveDocument.Selection.NewLine(3)
        DTE.ActiveDocument.Selection.Text = "***************************************************************************/"
    End Sub
End Module

04/19 : Fibonacci With yield In C#

 

yield syntax is a feature of FW 2.0.

  • It is used to return a item inside of a loop and keep state of calling method through multiple calls
  • can be used in function like power , Fibonacci which need the previous value to calculate current result
  • The function need to return IEnmerable<T>
  • yield return ; yield break
    • yield break will not return any value and stop
  • Calling function use foreach
  • Sample Code

……………………………….

public static IEnumerable<int> Fibonacci(int max)
       {
           if (max < 1)
           {
               yield break;
           }

           yield return 0;
           yield return 1;
           int counter = 0;
           int beforeprevious = 0;
           int previous = 1;
           int current = int.MinValue;
           while (counter++ < max)
           {
               current = beforeprevious + previous;
               yield return current;
               beforeprevious = previous;
               previous = current;
           }
       }

………………(calling funtion)

foreach (int jj in Fibonacci(10))
{
    Console.Write(jj.ToString() + "  "); 
}

Sunday, April 18, 2010

04/18:Visual Studio 2010中Parallel的使用(ZT)

 

Example

 

(1) Old way without parallel

在单核时代,CPU在同一时间只能做一件事情,所以他只能先烧水,后锻炼,这样显然会耽误时间。

​/​ ​串​行​式​起​床​ ​

  • p​r​i​v​a​t​e​ ​s​t​a​t​i​c​ ​v​o​i​d​ ​G​e​t​U​p​(​)​ ​
  • {​ ​
  • ​ ​ ​ ​S​t​a​r​t​(​"​G​e​t​U​p​"​)​;​ ​
  • ​ ​ ​ ​/​/​ ​先​烧​水​ ​
  • ​ ​ ​ ​b​o​i​l​(​)​;​ ​
  • ​ ​ ​ ​/​/​ ​后​锻​炼​ ​
  • ​ ​ ​ ​e​x​e​r​c​i​s​e​(​)​;​ ​
  • ​ ​ ​ ​E​n​d​(​"​G​e​t​U​p​"​)​;​ ​
  • }​ ​
  •  
  •  
  • /​/​ ​锻​炼​ ​
  • p​r​i​v​a​t​e​ ​s​t​a​t​i​c​ ​v​o​i​d​ ​e​x​e​r​c​i​s​e​(​)​ ​
  • {​ ​
  • ​ ​ ​ ​C​o​n​s​o​l​e​.​W​r​i​t​e​L​i​n​e​(​"​E​x​e​r​c​i​s​e​"​)​;​ ​
  • ​ ​ ​ ​T​h​r​e​a​d​.​S​l​e​e​p​(​2​0​0​0​)​;​ ​
  • ​ ​ ​ ​C​o​n​s​o​l​e​.​W​r​i​t​e​L​i​n​e​(​"​F​i​n​i​s​h​ ​E​x​e​r​c​i​s​e​"​)​;​ ​
  • }​ ​
  •  
  • /​/​ ​烧​水​ ​
  • p​r​i​v​a​t​e​ ​s​t​a​t​i​c​ ​v​o​i​d​ ​b​o​i​l​(​)​ ​
  • {​ ​
  • ​ ​ ​ ​C​o​n​s​o​l​e​.​W​r​i​t​e​L​i​n​e​(​"​B​o​i​l​"​)​;​ ​
  • ​ ​ ​ ​T​h​r​e​a​d​.​S​l​e​e​p​(​3​0​0​0​)​;​ ​
  • ​ ​ ​ ​C​o​n​s​o​l​e​.​W​r​i​t​e​L​i​n​e​(​"​F​i​n​i​s​h​ ​B​o​i​l​"​)​;​ ​
  • }​


  • (2).





    1. /​/​ ​并​行​式​起​床​ ​
    2. p​r​i​v​a​t​e​ ​s​t​a​t​i​c​ ​v​o​i​d​ ​P​a​r​a​l​l​e​l​G​e​t​U​p​(​)​ ​
    3. {​ ​
    4. ​ ​ ​ ​S​t​a​r​t​(​"​P​a​r​a​l​l​e​l​G​e​t​U​p​"​)​;​ ​
    5. ​ ​ ​ ​/​/​ ​在​烧​水​的​同​时​,​锻​炼​身​体​ ​
    6. ​ ​ ​ ​v​a​r​ s​t​e​p​s ​=​ n​e​w ​A​c​t​i​o​n​[​]​ ​{​ ​(​)​ ​=> ​b​o​i​l​(​)​,​ ​(​)​ ​=> ​e​x​e​r​c​i​s​e​(​)​ ​}​;​ ​
    7. ​ ​ ​ ​P​a​r​a​l​l​e​l​.​I​n​v​o​k​e​(​s​t​e​p​s​)​;​ ​
    8. ​ ​ ​ ​E​n​d​(​"​P​a​r​a​l​l​e​l​G​e​t​U​p​"​)​;​ ​
    9. }​

    image



    对Parallel.Invoke进行控制



    Parallel.Invoke提供了一个重载版本,它可以接受一个ParallelOptions对象作为参数,对 Parallel.Invoke的执行进行控制。通过这个对象,我们可以控制并行的最大线程数,各个任务是否取消执行等等。例如,在一个智能化的家中,系统会判断主人是否离开房间,如果主人离开了房间,则自动关闭屋子里的各种电器。利用Parallel.Invoke我们可以实现如下:



    see



    http://developer.51cto.com/art/201003/189896_1.htm



     



    并行任务之间的同步



    see



     



    http://developer.51cto.com/art/201003/189896_2.htm



     



    更复杂的任务之间的同步



    http://developer.51cto.com/art/201003/189896_3.htm



    我们在使用Barrier进行并行任务之间的同步时,有这样一个缺陷,我们需要预先知道所有需要同步的并行任务的数目,如果这个数目是随机的,就无法使用Barrier进行任务之间的同步了。并行任务数目不定这种情况很常见。我们还是来看上文中看电影的例子,每场进电影院看电影的观众数目是不固定的,那么退场的观众也是不固定的,甚至还有中途退场的。当所有观众都退场后,我们需要打扫电影院的卫生。这里需要的同步的就是所有观众都退场。针对这种数目不定的多个并行任务,.NET Framework提供了CountdownEvent这个类来进行任务之间的同步。



    就像它的名字一样,CountdownEvent基于这样一个简单的规则:当有新的需要同步的任务产生时,就调用AddCount增加它的计数,当有任务到达同步点是,就调用Signal函数减小它的计数,当CountdownEvent的计数为零时,就表示所有需要同步的任务已经完成,可以开始下一步任务了。下面我们利用CountdownEvent来模拟一下观众进场立场的情景。

    Wednesday, April 14, 2010

    04/14: 进程和线程的关系(ZT)

     

    网上有一副很经典的图可以让我们来理解进程和线程的关系:

    下面这副图是一个双向多车道的道路图,假如我们把整条道路看成是一个“进程”的话,那么图中由白色虚线分隔开来的各个车道就是进程中的各个“线程”了。

    image

    这副图出自:http://www.blogjava.net/pengpenglin/archive/2008/09/02/226292.html

    一些说明:

    • 这些线程(车道)共享了进程(道路)的公共资源(土地资源)。
    • 这些线程(车道)必须依赖于进程(道路),也就是说,线程不能脱离于进程而存在(就像离开了道路,车道也就没有意义了)。
    • 这些线程(车道)之间可以并发执行(各个车道你走你的,我走我的),也可以互相同步(某些车道在交通灯亮时禁止继续前行或转弯,必须等待其它车道的车辆通行完毕)。
    • 这些线程(车道)之间依靠代码逻辑(交通灯)来控制运行,一旦代码逻辑控制有误(死锁,多个线程同时竞争唯一资源),那么线程将陷入混乱,无序之中。
    • 这些线程(车道)之间谁先运行是未知的,只有在线程刚好被分配到CPU时间片(交通灯变化)的那一刻才能知道。

    线程的生存期

    当安排执行一个线程后,这个线程会经历几个状态,包括:未启动,激活,睡眠状态等,如下图显示的线程的生存期:

    image

    此图来自《c#线程参考手册》,这本书的电子版在CSDN有下载:http://d.download.csdn.net/down/1493065/phs415

    如果使用Java,看下面图更容易点,其实他们描述的意思完全一样:

    image

    JVM 与 进程,线程

    Java 编写的程序都运行在在Java虚拟机(JVM)中,每用java命令启动一个java应用程序,就会启动一个JVM进程。在同一个JVM进程中,有且只有一个进程,就是它自己。在这个JVM环境中,所有程序代码的运行都是以线程来运行的。JVM找到程序程序的入口点main(),然后运行main()方法,这样就产生了一个线程,这个线程称之为主线程。当main方法结束后,主线程运行完成。JVM进程也随即退出。

    如下图所示:

    image

    上述资料参看:

    JVM和JRE浅谈
    http://blog.csdn.net/liufeng_cp/archive/2008/07/18/2674317.aspx

    Java多线程编程总结
    http://lavasoft.blog.51cto.com/62575/27069

    Java 由于要考虑跨平台,所以它的进程,线程的模型比较简单, .Net 则复杂些了:

    .Net CLR、 进程, 应用程序域, 线程

    image

    说明:

    • 类似于Java虚拟机,目前CLR都是存在某个进程内,即:进程加载CLR。在 .Net 4.0 中,可以进程内并存 .Net 4.0 和之前版本, 就是这个的一个典型场景(.Net 4.0 的新特性你: "In-Proc SxS“ 就是指这个)。
    • .Net 中引入了应用程序域的概念,这是进程内的进一步的逻辑分区。显然:一个进程可以包含多个应用程序域,每一个应用程序域中承载一个.NET可执行程序。这样的好处:
      • 应用程序域是.NET平台操作系统独立性的关键特性。这种逻辑分区将不同操作系统加载可执行程序的差异抽象化了。
      • 和一个完整的进程相比,应用程序域的CPU和内存占用要小的多。因此CLR加载和卸载应用程序域比起完整的进程来说也快的多。
      • 应用程序域为承载的应用程序提供了深度隔离。如果进程中一个应用程序域失败了,其他的应用程序域也能保持正常。

    下面这幅图就描述了 应用程序域 所处的位置:

    image

    参看: CLR 全面透彻解析进程内并行

    Thursday, April 08, 2010

    0408:(ZT)Javascript与正则表达式个人总结与收录

    Javascript与正则表达式个人总结与收录--基础篇

    一、正则表达式的定义

    正则表达式是一种可以用于模式匹配和替换的强有力的工具。

    二、正则表达式的作用

      1、测试字符串的某个模式。例如,可以对一个输入字符串进行测试,看在该字符串是否存在一个模式,这也称为数据有效性验证。     
    2、替换文本。可以在文档中使用一个正则表达式来标识特定文字,然后将其删除,或者替换为别的内容。     
    3、根据模式匹配从字符串中提取一个子字符串。随后可以用来在文本或输入字段中查找特定文字。

    三、正则表达式的常见写法

    现在很多正则表达式都采用了perl风格的写法,即把正则表达式的内容放在/ /中间,看起美观,最主要的是实用,方便辨别。

    当然,如果不闲麻烦也可以写成如下的格式:

        var re = new RegExp (“regContent”);

    四、正则表达式的“元字符”

    所谓元字符就是指那些在正则表达式中具有特殊意义的专用字符,可以用来规定其前导字符(即位于元字符前面的字符)在目标对象中的出现模式。

    较为常用的元字符如下:

    “+”元字符规定其前导字符必须在目标对象中连续出现一次或多次。

    示例:/Ro+/ 因为上述正则表达式中包含“+”元字符,所以可以与类似“Ro”,“Rocky”,“Roof”等在字母R后面连续出现一个或多个字母o的字符串相匹配。

    “*”元字符规定其前导字符必须在目标对象中出现零次或多次。

    示例:/Ro*/ 因为上述正则表达式中包含“*”元字符,所以可以与类似 “Ricky”, “Rocky”或者 “Roof”等在字母R后面连续出现零个或多个字母o的字符串

                   匹配。   

    “?”元字符规定其前导对象必须在目标对象中连续出现零次或一次。 

    示例:/Ro?/ 因为上述正则表达式中包含“?”元字符,所以可以与目标对象中的 “Ricky”,“Rocky”这样在字母R后面连续出现零个或一个字母o的字符串匹配。

    五、正则表达式的限定符

          有时候不知道要匹配多少字符。为了能适应这种不确定性,需要用到正则表达式中的限定符。

    {n}:     n 是一个非负整数,表示匹配确定的 n 次。例如,o{2} 不能匹配 “Rocky” 中的 o,但是能匹配 “Roof” 中的两个 o。  

    {n,}:    n 是一个非负整数,表示至少匹配 n 次。例如,o{2,} 不能匹配 “Rocky” 中的 o,但能匹配 “Roof”或“Whoooooa” 中的o。

    {n,m}:   m 和 n 均为非负整数,其中n <= m,表示最少匹配 n 次且最多匹配 m 次。例如,"o{1,3}" 将匹配 “Whoooooa” 中的前三个 o

    六、正则表达式中的定位符

    正则表达式中的定位符用于规定匹配模式在目标对象中的出现位置。

    “^”—— 定位符规定匹配模式必须出现在目标字符串的开头  

    “$”—— 定位符规定匹配模式必须出现在目标对象的结尾  

    “\b”—— 定位符规定匹配模式必须出现在目标字符串的开头或结尾的两个边界之一  

    “\B”——定位符则规定匹配对象必须位于目标字符串的开头和结尾两个边界之内, 即匹配对象既不能作为目标字符串的开头,也不能作为目标字符串的结尾。

    示例:

    /^Ro/ 可以与类似 “Rocky”,“Rock”或“Roof”的字符串相匹配。

    /ball$/ 可以与类似“football”,“basketball”或 “baseball”的字符串相匹配。

    /\bon/ 可以与类似 “one”,“once”的字符串相匹配。

    /on\b/   可以与类似“sensation”,“generation”的字符串相匹配

    /on\B/ 可以与类似“song”,“tone”,“tongue”的字符串相匹配()

                  注:/\Bon此处匹配作用相同/

    七、正则表达式中的其他常见符号

    “()”:    在正则表达式中使用括号把字符串组合在一起。“()”符号包含的内容必须同时出现在目标对象中才可匹配,当然有时在功能没有影响的情况下,会为了方便代码检查等加上括号运算符。

    “|”:     在正则表达式中实现类似编程逻辑中的“或”运算。例如:/Rocky|Ricky/ 上述正则表达式将会与目标对象中的 “Rocky”或“Ricky”。

    “[]”:    指定可以取的字符范围,如 [0-9] 可以匹配从0到9范围内的任何一个数字。

    “[^]”: 规定目标对象中不能存在模式中所规定的字符串,即否定符。例如:/[^A-C]/ 上述字符串将会与目标对象中除A,B,和C之外的任何字符相匹配。

        例如:

            alert(/[^A-C]/.test('A'));   //false

            alert(/[^A-C]/.test('AB'));  //false

            alert(/[^A-C]/.test('AD'));  //true

            alert(/[^A-C]/.test('DF'));  //true

    “\”:   转义符。当用户需要在正则表达式的模式中加入元字符,并查找其匹配对象时,可以使用转义符。例如:/R\*/会与“R*”匹配而非“Ro”或“Ri”等相匹配。

    八、常用正则表达式匹配

    \s:   用于匹配单个空格符,包括tab键和换行符;

    \S:   用于匹配除单个空格符之外的所有字符;

    \d:   用于匹配从0到9的数字;

    \w:   用于匹配字母,数字或下划线字符;

    \W:   用于匹配所有与\w不匹配的字符;

    : 用于匹配除换行符之外的所有字符。

    九、正则表达式常见的运算符等价转化关系

    o{1,}     <=>   o+

    o{0,}     <=>  o*

    o{0,1}   <=>   o?

        \d      <=>   [0-9]

        \w     <=>    [a-zA-Z0-9_]

    十、正则表达式运算符优先级

    1.\                      转义符  

    2.(),(?:),(?=),[]          圆括号和方括号  

    3.*,+,?,{n},{n,},{n,m}   限定符  

    4.^,$,\anymetacharacter   位置和顺序  

    5.|                     “或”操作 

     

    一、正则表达式中的量词

    贪婪量词:

          先看整个字符串是不是一个匹配。如果没有发现匹配,它去掉最后字符串中的最后一个字符,并再次尝试。如果还是没有发现匹配,那么再次去掉最后一个字符串,这个过程会一直重复直到发现一个匹配或者字符串不剩任何字符。简单量词都是贪婪量词。

    惰性量词:

          先看字符串中的第一个字母是不是一个匹配,如果单独着一个字符还不够,就读入下一个字符,组成两个字符的字符串。如果还没有发现匹配,惰性量词继续从字符串中添加字符直到发现一个匹配或者整个字符串都检查过也没有匹配。惰性量词和贪婪量词的工作方式恰好相反。

    支配量词:

          只尝试匹配整个字符串。如果整个字符串不能产生匹配,不做进一步尝试。

    image

     

    ?:当该字符紧跟在任何一个其他限制符 (*, +, ?, {n}, {n,}, {n,m}) 后面时,匹配模式是非贪婪的。非贪婪模式尽可能少的匹配所搜索的字符串,而默认的贪婪模式则尽可能多的匹配所搜索的字符串。例如,对于字符串 "oooo",'o+?' 将匹配单个 "o",而 'o+' 将匹配所有 'o'。

    二、 正则表达式高级使用

    (?:pattern) :匹配 pattern 但不获取匹配结果,也就是说这是一个非捕获型匹配,不进行存储供以后使用。这在使用 "|" 字符来组合一个模式的各个部分是很有

                       用。例如要匹配 Rocky 和Ricky,那么R(?:o|i)cky较之 Rocky |Ricky表达更为简略。

    (?=pattern) :正向预查,在任何匹配 pattern 的字符串开始处匹配查找字符串,匹配pattern前面的位置。这是一个非捕获型匹配,也就是说,该匹配不需要获取                      供以后使用。

    示例:

    alert(/Windows (?=95|98|NT|2000)/.test('Windows 2000'));   //true

    alert(/Windows(?=95|98|NT|2000)/.test('Windows 2000'));   //false

                          alert(/Windows (?=95|98|NT|2000)/.test('Windows 3.1'));    //false

                       预查不消耗字符,也就是说,在一个匹配发生后,在最后一次匹配之后立即开始下一次匹配的搜索,而不是从包含预查的字符之后开始。   

    (?!pattern) :负向预查,在任何不匹配pattern的字符串开始处匹配查找字符串, 匹配后面不是pattern的位置,这也是一个非捕获型匹配。

    示例:

                               alert(/Windows (?!95|98|NT|2000)/.test('Windows 2000'));   // false

                               alert(/Windows(?!95|98|NT|2000)/.test('Windows 2000'));   // true

                               alert(/Windows (?!95|98|NT|2000)/.test('Windows 3.1'));    // true

    \数字:          反向引用,通过数字编号引用前面的内容

    示例:

                               alert(/ba(na)\1/.test('banana'));  //true

                               alert(/(\d{4})\1(\d{2})\1/.test('12341234121234'));   //true

                               alert(/(\d{4})\1(\d{2})\2/.test('123412341212'));      //true

    三、正则表达式中参数的使用

    1、g:表明可以进行全局匹配。

    ①   对于表达式对象的exec方法,不加入g,则只返回第一个匹配,无论执行多少次均是如此,如果加入g,则第一次执行也返回第一个匹配,再执行返回第二个匹配,依次类推。

          例如:
                 var regx=/user\d/;
                 var str=“user18dsdfuser2dsfsd”;
                 var rs=regx.exec(str);//此时rs的值为{user1}
                 var rs2=regx.exec(str);//此时rs的值依然为{user1}
                 如果regx=/user\d/g;则rs的值为{user1},rs2的值为{user2}

          通过这个例子说明:对于exec方法,表达式加入了g,并不是说执行exec方法就可以返回所有的匹配,而是说加入了g之后,我可以通过某种方式得到所有的匹配,这里的方式对于exec而言,就是依次执行这个方法即可。
    ②   对于match方法,不加入g,只是返回第一个匹配,而且一直执行match方法也总是返回第一个匹配,但加入g,则一次返回所有的匹配(注意是“所有”)。

          例如:
                 var regx=/user\d/;
                 var str=“user1sdfsffuser2dfsdf”;
                 var rs=str.match(regx);//此时rs的值为{user1}
                 var rs2=str.match(regx);//此时rs的值依然为{user1}
                 如果regx=/user\d/g,则rs的值为{user1,user2},rs2的值也为{user1,user2}
    ③  对于String对象的replace方法,表达式不加入g,则只替换第一个匹配,如果加入g,则替换所有匹配。
    ④  对于表达式对象的test方法,加g与不加g没有什么区别。
    ⑤  对于String对象的split方法,加g与不加g是一样的。

    ⑥  对于String对象的search方法,加不加g也是一样的。

    2、i:忽略大小写匹配

    示例:

            alert(/[A-Z]/.test('a'));  //false

            alert(/[A-Z]/i.test('a'));  //true

    3、m:多行模式

    示例:

               var toMatch = "First second\nthird fourth\nfifth sixth";

               var reg = /(\w+)$/g;

               alert(toMatch.match(reg));  // sixth

        假如将\n看作是换行符,此时也想匹配换行前的单词,即second,fourth,则可以使用多行模式:

               var toMatch = "First second\nthird fourth\nfifth sixth";

               var regm = /(\w+)$/gm;

               alert(toMatch.match(regm));   // second,fourth,sixth,

    四、正则表达式中test,exec,match,search方法比较

    test():  返回bool值,表示是否匹配成功。

    exec(): 返回第一个匹配的字符串

    match():返回所有匹配的字符串(数组)

    search():返回在字符串中出现的第一个匹配的位置

    示例:

           var sToMatch = “a bat, a Cat, a fAt baT, a faT cat”;

           var re = /at/;

           re.test(sToMatch);    //true

           re.exec(sToMatch);   //返回第一个at的实例(即bat中的那个)

           re.match(sToMatch);  //返回at,at,At,aT,aT,at组成的数组

           re.search(sToMatch);  //3

    五、备注

    1、  使用限定符时,如o{0,1},注意在逗号和两个数之间不能有空格

    2、  一般来说,当“^”出现在 “[]”内时就被视做否定运算符;而当“^”位于“[]”之外,或没有“[]”时,则应当被视做定位符。  

     

    光说不练假把式,下面通过对字符串的常见操作实现对相关理论的简单回顾。

    声明字符串处理对象:

    var stringUtil = new Object();



    1、将字符串首字母大写



    方法一、首字符匹配英文字母,则用其大写形式替换



    1 function capitalize(str){
    2 return str.replace(/^\w/, str.substr(0, 1).toUpperCase());
    3 }



    方法二、



    1 stringUtil.capitalize = function(str) {
    2 return str.replace(/^\w/, function(s) {
    3 return s.toUpperCase();
    4 });
    5 }



    2、计算字符串长度(汉字算两个字节)



    1 stringUtil.lenReg = function(str) {
    2 return str.replace(/[^\x00-\xff]/g, " ").length;
    3 }



    3、去除字符串中的空白



    代码



    1 stringUtil.lTrim = function(str) {
    2 /// <summary>去掉字符串左边的空格</summary>
    3   return str.replace(/(^\s*)/g, "");
    4 }
    5
    6 stringUtil.rTrim = function(str) {
    7 /// <summary>去掉字符串右边的空格</summary>
    8   return str.replace(/(\s*$)/g, "");
    9 }
    10
    11 stringUtil.trim = function(str) {
    12 /// <summary>去除字符串开头结尾的空白</summary>
    13   return str.replace(/(^\s*)|(\s*$)/g, "");
    14 }
    15 



    4、去除字符串中的相同字符



    1 stringUtil.trimSameChar = function(str) {
    2 var re = /(.)(.*)\1/g;
    3 while (str.match(re) != null) {
    4 str = str.replace(re, "$1$2");
    5 }
    6 return str;
    7 }



    5、去除字符串中的HTML标签



    1 stringUtil.trimHTML = function() {
    2 //(?:.|\s)表示匹配<后面的任何文本,然后跟着一个>,*?为惰性量词,表示零次或多次出现
    3   var reTag = /<(?:.|\s)*?>/g;
    4 return this.replace(reTag, "");
    5 };



    对比:通过普通方式和正则表达式取得URL中的参数值



    普通循环遍历处理:



    代码



    1 function getUrlParamValue(fieldName) {
    2 /// <summary>取得网址中的参数,相当于.net的Request.QueryString方法,
             如果没有取到值,返回null</summary>
    3   /// <param name="fieldName">查询的参数名</param>
    4   var urlString = location.search;//假如是:default.aspx?id=1&page=1
    5 if (urlString.length > 0) {
    6 var paramsArr = urlString.split('?')[1].split('&');//{id=1} {page=1}
    7 for (var i = 0; i < paramsArr.length; i++) {
    8 var paramValue = paramsArr[i].split('=');
    9 //如果与所需参数相同,则返回其参数值
    10 if (paramValue[0].toUpperCase() == fieldName.toUpperCase()) {
    11 return paramValue[1];
    12 break;
    13 }
    14 }
    15 return;
    16 }
    17 else {
    18 return null;
    19 }
    20 }



    正则表达式处理:



    分析:URL的参数可能有多个,所以需要的参数值有可能出现在?后面,也有可能出现在&后面,故此时需要同



    时匹配二者,而此参数的后面还可能有零个或多个参数,所以要通过[^\&]*进行匹配,最后为了忽略大小写可以在



    构造正则表达式的时加入i 参数。



    代码



    1 function getUrlParamValueByReg(fieldName) {
    2 var sValue = location.search.match(new RegExp("[\?|\&]" + fieldName
    + "=([^&]*)", "i"));
    3 return sValue ? sValue.toString().split(",")[1] : null;
    4 }
    5



    通过比较,不难看出正则表达式的强大啊。。。





    总结了一些常见的表单验证类的操作,供以后备用。。。



    var validateUtil = new Object();

    validateUtil.isEmpty = function(id) {
    /// <summary>检测元素</summary>
    /// <param name="id">元素id</param>
    var str = stringUtil.trim($(id).value);
    if (str.length == 0) {
    return true;
    }
    return false;
    }

    validateUtil.isContainChinese = function(str) {
    /// <summary>检测字符串中是否有汉字</summary>
    var reg = /[\u4e00-\u9fa5]/;
    return reg.test(str);
    }

    validateUtil.isValidUserName = function(strUserName) {
    /// <summary>检测用户名,长度在6到15位之间,且只能由中英文、数字、-、_构成,
    ///中文算两个字符</summary>
    if (stringUtil.len(strUserName) >= 6 && stringUtil.len(strUserName) <= 15) {
    var reg = /([\u4e00-\u9fa5]|[\w-])/g;
    return reg.test(strUserName);
    }
    return false;
    }

    validateUtil.isValidPassword = function(strPass) {
    /// <summary>检测密码,密码长度在6到15位之间,且只能由英文字母、数字、-、_构成,
    ///且首位必须是英文字母</summary>
    var reg = /^[a-zA-Z][\w-]{5,15}$/;
    return reg.test(strPass);
    }

    validateUtil.isValidEmail = function(strEmail) {
    /// <summary>检测Email地址是否合法</summary>
    var reg = /\w+([-+.]\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)*/;
    return reg.test(strEmail);
    }

    validateUtil.isValidDate = function(strDate, separator) {
    /// <summary>检测日期是否合法</summary>
    /// <param name="separator">可选项:年月日之间的分隔符,只可以为'-'或'/',
    默认为'-'</param>
    if (strDate.length == 0) {
    return false;
    }
    //根据正则表达式判断基本的日期格式是否正确:(1|2)xxx-xx-xx
    if (separator == '/') {
    var reg = /^(1|2)([\d]){3}\/[\d]{2}\/[\d]{2}/;
    if (!reg.test(strDate)) {
    return false;
    }
    }
    else {
    var reg = /^(1|2)([\d]){3}-[\d]{2}-[\d]{2}/;
    if (!reg.test(strDate)) {
    return false;
    }
    }
    //去除日期中的 '-' 或 '/' 分隔符
    var strFormatDate = strDate.replace(/(-|\/)/g, '');
    var year, month, day;
    //取得年月份的具体值
    year = isIE ?
    strFormatDate.substr(0, 4) : strFormatDate.substr(0, 4) + 1900;
    month = strFormatDate.substr(4, 2);
    day = strFormatDate.substr(6, 2);
    //检测年月日的合法性
    return checkDate(year, month, day);
    }

    validateUtil.isValidCid = function(strId) {
    /// <summary>检测身份证号码是否合法,若为15位则验证成功后返回转换为18位的身份证号,
    ///若为18位则验证成功后则返回true</summary>
    strId = strId.toUpperCase();
    //判断基本的身份证号码格式是否正确:15位时全为数字,18位前17位为数字,
    最后一位是校验位,可能为数字或字符X。
    if (!(/(^\d{15}$)|(^\d{17}([0-9]|X)$)/.test(strId))) {
    return false;
    }
    //校验位按照ISO 7064:1983.MOD 11-2的规定生成,X可以认为是数字10
    //下面分别分析出生日期和校验位
    var strIdLength = strId.length;
    var reg;
    var year, month, day;
    if (strIdLength == 15) {
    reg = new RegExp(/^(\d{6})(\d{2})(\d{2})(\d{2})(\d{3})$/);
    //如"123456789123456",则arrSplit为123456789123456,123456,78,91,23,456
    var arrSplit = strId.match(reg);

    //检查生日日期是否正确
    var year = '19' + arrSplit[2];
    var month = arrSplit[3];
    var day = arrSplit[4];
    if (!checkDate(year, month, day)) {
    //输入的身份证号里出生日期不对
    return false;
    }
    else {
    //将15位身份证转成18位
    return strId.substr(0, 6) + '19' + strId.substr(6, 9) + 'X';
    }
    }
    if (strIdLength == 18) {
    reg = new RegExp(/^(\d{6})(\d{4})(\d{2})(\d{2})(\d{3})([0-9]|X)$/);
    //如"12345678912345678X",则arrSplit为12345678912345678X,123456,7891,23,
    45,678,X
    var arrSplit = strId.match(reg);

    //检查生日日期是否正确
    var year = '19' + arrSplit[2];
    var month = arrSplit[3];
    var day = arrSplit[4];
    if (!checkDate(year, month, day)) {
    //输入的身份证号里出生日期不对
    return false;
    }
    else {
    //检验18位身份证的校验码是否正确。
    //校验位按照ISO 7064:1983.MOD 11-2的规定生成,X可以认为是数字10。
    var validStrId;
    var arrInt = new Array(7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5,
    8, 4, 2);
    var arrCh = new Array('1', '0', 'X', '9', '8', '7', '6', '5', '4',
    '3', '2');
    var nTemp = 0, i;
    for (i = 0; i < 17; i++) {
    nTemp += strId[i] * arrInt[i];
    }
    validStrId = arrCh[nTemp % 11];
    if (validStrId != strId.substr(17, 1)) {
    //18位身份证的校验码不正确,最后以为应该是+ validStrId);
    return false;
    }
    }
    }
    return true;
    }

    function checkDate(year, month, day) {
    //月份不可大于12,天数不可大于31
    if (parseInt(month, 10) <= 0 || parseInt(month, 10) > 12 ||
    parseInt(day, 10) <= 0 || parseInt(day, 10) > 31) {
    return false;
    }
    //判断是否是闰年
    if ((year % 4 == 0 && year % 100 != 0) || year % 400 == 0) {
    //闰年2月不可大于29天
    if (parseInt(month, 10) == 2) {
    if (parseInt(day, 10) > 29) {
    return false;
    }
    }
    }
    else {
    //平年2月不可大于28天
    if (parseInt(month, 10) == 2) {
    if (parseInt(day, 10) > 28) {
    return false;
    }
    }
    }
    var monthArr = new Array(1, 3, 5, 7, 8, 10, 12);
    for (var i = 0; i < monthArr.length; i++) {
    //“大月”天数不可大于31
    if (monthArr[i] == parseInt(month, 10)) {
    if (parseInt(day, 10) > 31) {
    return false;
    }
    }
    else {
    //“小月”天数不可大于30
    if (parseInt(day, 10) > 30) {
    return false;
    }
    }
    }
    return true;
    }

    Tuesday, April 06, 2010

    04/06: SP VMware Environment Installation Steps

    • Install Window Server 2003 under VMware Workstation
    • Config Window Components in Window Server 2003(add/remove window components of control panel)
      • Uncheck IE security
      • Enable DNS
      • IIS
      • ASP.net
      • SMTP
    • Config Active Directory
    • Install SQL Server 2005
    • Install Office 2007
    • Install VS2008

    Saturday, April 03, 2010

    04/03: Interview Question :Get Maximum Sum of subarray

    给定一个长度为n的一维数组a,请找出此数组的一个子数组,使得此子数组的和sum=a[i]+a[i+1]+……+a[j]最大,其中i>=0,i<n,j>=i,j<n,例如
       31 -41 59 26 -53  58 97 -93 -23 84
    子矩阵59+26-53+58+97=187为所求的最大子数组。

     

     

    1. -直接穷举法:(O(n3)

    public int GetSubArrayMaximum()
          {
              int maxresult=0;
              int[] a = new int[] { -8, 6, 3, -2, 4, -10 };
              for (int i = 0; i < a.Length; i++)
              {
                  for(int j=i;j<a.Length;j++)
                  {
                      int sum=0;
                      for(int k=i;k<=j;k++)
                      {
                          sum+=a[k];
                          if(sum>maxresult)
                          {
                              maxresult=sum;
                          }

                      }

                  }
              }

          return maxresult;

          }

     

    }

    2.(O(n)

    当我们加上一个正数时,和会增加;当我们加上一个负数时,和会减少。如果当前得到的和是个负数,那么这个和在接下来的累加中应该抛弃并重新清零,不然的话这个负数将会减少接下来的和

    int[] input = new int[] { 3, 73, -95, 42, 43, 29, -30 ,1};
                   //, -87, 74, -53, 22, 74, -91, -1, -27, -8, -14, 26, -67, -74 };
               //fst is first element of return array
               //snd  is end element of return array
               //maxsum is the sum of return array
               int fst = -1, snd = -1, tFst = 0, tSnd = 0, maxSum = 0, currSum = 0;
               for (; tSnd < input.Length; ++tSnd)
               {
                   currSum += input[tSnd];
                   if (currSum > maxSum)
                   {
                       maxSum = currSum;
                       fst = tFst;
                       snd = tSnd;
                   }
                   if (currSum < 0)
                   {
                       currSum = 0;
                       tFst = tSnd + 1;
                   }
               }

     

      // if all data are negative, find the greatest element in the array

    if(maxSum<0)

    {

    // find the greatest element of array

    }

    Friday, April 02, 2010

    04/02: Double-Checking Locking in .Net

     

    The purpose of this mechanism is to improve performance  in Multithread environment.

    • Defer constructing a singleton object until an application requests it (sometimes called lazy initialization).If the application never requests the object, it never gets constructed, saving time and memory
    • If two threads request this singleton object at the same time . Need to use some thread synchronization to make sure thread safe

     

    internal sealed class Singleton {
    // s_lock is required for thread safety and having this object assumes that creating
    // the singleton object is more expensive than creating a System.Object object and that
    // creating the singleton object may not be necessary at all. Otherwise, it is more
    // efficient and easier to just create the singleton object in a class constructor
    private static readonly Object s_lock = new Object();
    // This field will refer to the one Singleton object
    private static Singleton s_value = null;
    // Private constructor prevents any code outside this class from creating an instance
    private Singleton() {
    // Code to initialize the one Singleton object goes here...
    }
    // Public, static method that returns the Singleton object (creating it if necessary)
    public static Singleton GetSingleton() {
    // If the Singleton was already created, just return it (this is fast)
    if (s_value != null) return s_value;
    lock(s_lock)

    {

    if (s_value == null) {
    // Still not created, create it
    Singleton temp = new Singleton();
    // Save the reference in s_value (see discussion for

    details)

    }

    }

    }
    // Return a reference to the one Singleton object
    return s_value;
    }
    }

     

     

    According to expert, this way is not perfect

     

    In my opinion, developers think it is cool, and they use it far more often than they should. In most scenarios, this technique actually hurts efficiency. Here is a much simpler version of the Singleton class that behaves the same as the previous version. This version does not use the double-check locking technique:

    internal sealed class Singleton {
    private static Singleton s_value = new Singleton();
    // Private constructor prevents any code outside this class from creating an instance
    private Singleton() {
    // Code to initialize the one Singleton object goes here...
    }
    // Public, static method that returns the Singleton object (creating it if necessary)
    public static Singleton GetSingleton() { return s_value; }
    }

    Thursday, April 01, 2010

    04/01: SQL Index Basic(ZT)

     

    Relational databases like SQL Server use indexes to find data quickly when a query is processed. Creating and removing indexes from a database schema will rarely result in changes to an application's code; indexes operate 'behind the scenes' in support of the database engine. However, creating the proper index can drastically increase the performance of an application.

    The SQL Server engine uses an index in much the same way a reader uses a book index. For example, one way to find all references to INSERT statements in a SQL book would be to begin on page one and scan each page of the book. We could mark each time we find the word INSERT until we reach the end of the book. This approach is pretty time consuming and laborious. Alternately, we can also use the index in the back of the book to find a page number for each occurrence of the INSERT statements. This approach produces the same results as above, but with tremendous savings in time.

    When a SQL Server has no index to use for searching, the result is similar to the reader who looks at every page in a book to find a word: the SQL engine needs to visit every row in a table. In database terminology we call this behavior a table scan, or just scan.

    A table scan is not always a problem, and is sometimes unavoidable. However, as a table grows to thousands of rows and then millions of rows and beyond, scans become correspondingly slower and more expensive.

    Consider the following query on the Products table of the Northwind database. This query retrieves products in a specific price range.

    SELECT ProductID, ProductName, UnitPrice
    FROM Products WHERE (UnitPrice > 12.5) AND (UnitPrice < 14)


    There is currently no index on the Product table to help this query, so the database engine performs a scan and examines each record to see if UnitPrice falls between 12.5 and 14. In the diagram below, the database search touches a total of 77 records to find just three matches.





    Now imagine if we created an index, just like a book index, on the data in the UnitPrice column. Each index entry would contain a copy of the UnitPrice value for a row, and a reference (just like a page number) to the row where the value originated. SQL will sort these index entries into ascending order. The index will allow the database to quickly narrow in on the three rows to satisfy the query, and avoid scanning every row in the table.



    Create An Index



    Having a data connection in the Server Explorer view of Visual Studio.NET allows us to easily create new indexes:




    • Navigate to the Products table of the Northwind database.


    • Right click the table and select Design Table from the context menu.


    • With the design screen in focus, click the Indexes/Keys item on the View menu of the IDE.



    This should bring you to the following tabbed dialog box.



    The dialog is currently displaying an existing index on the Products table: the PK_Products index. We will see later in this chapter how primary key fields are automatically indexed to enforce uniqueness in the key values.




    • In the above dialog click on the New button, and in the Index name text box, replace the existing entry with IDX_UnitPrice.


    • Beneath the text box is a control where we set the columns to index. Pull down the entry with ProductID and select the UnitPrice column instead.


    • Leave all of the other options with default settings.


    • Close the dialog and the table design view, making sure to save all of the changes when prompted to do so. The IDE will then issue the commands to create the new index.



    We can create the same index using the following SQL. The command specifies the name of the index (IDX_UnitPrice), the table name (Products), and the column to index (UnitPrice).



     CREATE INDEX [IDX_UnitPrice] ON Products (UnitPrice)


    To verify that the index is created, use the following stored procedure to see a list of all indexes on the Products table:



     EXEC sp_helpindex Customers 


    How It Works


    The database takes the columns specified in a CREATE INDEX command and sorts the values into a special data structure known as a B-tree. A B-tree structure supports fast searches with a minimum amount of disk reads, allowing the database engine to quickly find the starting and stopping points for the query we are using.



    Conceptually, we may think of an index as shown in the diagram below. On the left, each index entry contains the index key (UnitPrice). Each entry also includes a reference (which points) to the table rows which share that particular value and from which we can retrieve the required information.





    Much like the index in the back of a book helps us to find keywords quickly, so the database is able to quickly narrow the number of records it must examine to a minimum by using the sorted list of UnitPrice values stored in the index. We have avoided a table scan to fetch the query results. Given this sketch of how indexes work, lets examine some of the scenarios where indexes offer a benefit.



    Taking Advantage of Indexes



    The database engine can use indexes to boost performance in a number of different queries. Sometimes these performance improvements are dramatic. An important feature of SQL Server 2000 is a component known as the query optimizer. The query optimizer's job is to find the fastest and least resource intensive means of executing incoming queries. An important part of this job is selecting the best index or indexes to perform the task. In the following sections we will examine the types of queries with the best chance of benefiting from an index.



    Searching For Records


    The most obvious use for an index is in finding a record or set of records matching a WHERE clause. Indexes can aid queries looking for values inside of a range (as we demonstrated earlier), as well as queries looking for a specific value. By way of example, the following queries can all benefit from an index on UnitPrice:



    DELETE FROM Products WHERE UnitPrice = 1 

    UPDATE Products SET Discontinued = 1 WHERE UnitPrice > 15

    SELECT * FROM PRODUCTS WHERE UnitPrice BETWEEN 14 AND 16


    Indexes work just as well when searching for a record in DELETE and UPDATE commands as they do for SELECT statements.



    Sorting Records


    When we ask for a sorted dataset, the database will try to find an index and avoid sorting the results during execution of the query. We control sorting of a dataset by specifying a field, or fields, in an ORDER BY clause, with the sort order as ASC (ascending) or DESC (descending). For example, the following query returns all products sorted by price:



     SELECT * FROM Products ORDER BY UnitPrice ASC 


    With no index, the database will scan the Products table and sort the rows to process the query. However, the index we created on UnitPrice (IDX_UnitPrice) earlier provides the database with a presorted list of prices. The database can simply scan the index from the first entry to the last entry and retrieve the rows in sorted order.



    The same index works equally well with the following query, simply by scanning the index in reverse.



    SELECT * FROM Products ORDER BY UnitPrice DESC 


    Grouping Records


    We can use a GROUP BY clause to group records and aggregate values, for example, counting the number of orders placed by a customer. To process a query with a GROUP BY clause, the database will often sort the results on the columns included in the GROUP BY. The following query counts the number of products at each price by grouping together records with the same UnitPrice value.



    SELECT Count(*), UnitPrice FROM Products GROUP BY UnitPrice 


    The database can use the IDX_UnitPrice index to retrieve the prices in order. Since matching prices appear in consecutive index entries, the database is able count the number of products at each price quickly. Indexing a field used in a GROUP BY clause can often speed up a query.



    Maintaining a Unique Column


    Columns requiring unique values (such as primary key columns) must have a unique index applied. There are several methods available to create a unique index. Marking a column as a primary key will automatically create a unique index on the column. We can also create a unique index by checking the Create UNIQUE checkbox in the dialog shown earlier. The screen shot of the dialog displayed the index used to enforce the primary key of the Products table. In this case, the Create UNIQUE checkbox is disabled, since an index to enforce a primary key must be a unique index. However, creating new indexes not used to enforce primary keys will allow us to select the Create UNIQUE checkbox. We can also create a unique index using SQL with the following command:



    CREATE UNIQUE INDEX IDX_ProductName On Products (ProductName)


    The above SQL command will not allow any duplicate values in the ProductName column, and an index is the best tool for the database to use to enforce this rule. Each time an application adds or modifies a row in the table, the database needs to search all existing records to ensure none of values in the new data duplicate existing values. Indexes, as we should know by now, will improve this search time.



    Index Drawbacks



    There are tradeoffs to almost any feature in computer programming, and indexes are no exception. While indexes provide a substantial performance benefit to searches, there is also a downside to indexing. Let's talk about some of those drawbacks now.



    Indexes and Disk Space

    Indexes are stored on the disk, and the amount of space required will depend on the size of the table, and the number and types of columns used in the index. Disk space is generally cheap enough to trade for application performance, particularly when a database serves a large number of users. To see the space required for a table, use the sp_spaceused system stored procedure in a query window.

     EXEC sp_spaceused Orders 


    Given a table name (Orders), the procedure will return the amount of space used by the data and all indexes associated with the table, like so:



    Name    rows     reserved    data    index_size  unused       
    ------- -------- ----------- ------ ---------- -------
    Orders 830 504 KB 160 KB 320 KB 24 KB


    According to the output above, the table data uses 160 kilobytes, while the table indexes use twice as much, or 320 kilobytes. The ratio of index size to table size can vary greatly, depending on the columns, data types, and number of indexes on a table.



    Indexes and Data Modification


    Another downside to using an index is the performance implication on data modification statements. Any time a query modifies the data in a table (INSERT, UPDATE, or DELETE), the database needs to update all of the indexes where data has changed. As we discussed earlier, indexing can help the database during data modification statements by allowing the database to quickly locate the records to modify, however, we now caveat the discussion with the understanding that providing too many indexes to update can actually hurt the performance of data modifications. This leads to a delicate balancing act when tuning the database for performance.



    In decision support systems and data warehouses, where information is stored for reporting purposes, data remains relatively static and report generating queries outnumber data modification queries. In these types of environments, heavy indexing is commonplace in order to optimize the reports generated. In contrast, a database used for transaction processing will see many records added and updated. These types of databases will use fewer indexes to allow for higher throughput on inserts and updates.



    Every application is unique, and finding the best indexes to use for a specific application usually requires some help from the optimization tools offered by many database vendors. SQL Server 2000 and Access include the Profiler and Index Tuning Wizard tools to help tweak performance.



    Now we have enough information to understand why indexes are useful and where indexes are best applied. It is time now to look at the different options available when creating an index and then address some common rules of thumb to use when planning the indexes for your database.



    Clustered Indexes



    Earlier in the article we made an analogy between a database index and the index of a book. A book index stores words in order with a reference to the page numbers where the word is located. This type of index for a database is a nonclustered index; only the index key and a reference are stored. In contrast, a common analogy for a clustered index is a phone book. A phone book still sorts entries into alphabetical order. The difference is, once we find a name in a phone book, we have immediate access to the rest of the data for the name, such as the phone number and address.



    For a clustered index, the database will sort the table's records according to the column (or columns) specified by the index. A clustered index contains all of the data for a table in the index, sorted by the index key, just like a phone book is sorted by name and contains all of the information for the person inline. The nonclustered indexes created earlier in the chapter contain only the index key and a reference to find the data, which is more like a book index. You can only create one clustered index on each table.



    In the diagram below we have a search using a clustered index on the UnitPrice column of the Products table. Compare this diagram to the previous diagram with a regular index on UnitPrice. Although we are only showing three columns from the Products table, all of the columns are present and notice the rows are sorted into the order of the index, there is no reference to follow from the index back to the data.





    A clustered index is the most important index you can apply to a table. If the database engine can use a clustered index during a query, the database does not need to follow references back to the rest of\ the data, as happens with a nonclustered index. The result is less work for the database, and consequently, better performance for a query using a clustered index.



    To create a clustered index, simply select the Create As CLUSTERED checkbox in the dialog box we used at the beginning of the chapter. The SQL syntax for a clustered index simply adds a new keyword to the CREATE INDEX command, as shown below:



     CREATE CLUSTERED INDEX IDX_SupplierID ON Products(SupplierID) 


    Most of the tables in the Northwind database already have a clustered index defined on a table. Since we can only have one clustered index per table, and the Products table already has a clustered index (PK_Products) on the primary key (ProductId), the above command should generate the following error:



    Cannot create more than one clustered index on table 'Products'. 
    Drop the existing clustered index 'PK_Products' before creating another.


    As a general rule of thumb, every table should have a clustered index. If you create only one index for a table, use a clustered index. Not only is a clustered index more efficient than other indexes for retrieval operations, a clustered index also helps the database efficiently manage the space required to store the table. In SQL Server, creating a primary key constraint will automatically create a clustered index (if none exists) using the primary key column as the index key.



    Sometimes it is better to use a unique nonclustered index on the primary key column, and place the clustered index on a column used by more queries. For example, if the majority of searches are for the price of a product instead of the primary key of a product, the clustered index could be more effective if used on the price field. A clustered index can also be a UNIQUE index.



    A Disadvantage to Clustered Indexes


    If we update a record and change the value of an indexed column in a clustered index, the database might need to move the entire row into a new position to keep the rows in sorted order. This behavior essentially turns an update query into a DELETE followed by an INSERT, with an obvious decrease in performance. A table's clustered index can often be found on the primary key or a foreign key column, because key values generally do not change once a record is inserted into the database.



    Composite Indexes



    A composite index is an index on two or more columns. Both clustered and nonclustered indexes can be composite indexes. Composite indexes are especially useful in two different circumstances. First, you can use a composite index to cover a query. Secondly, you can use a composite index to help match the search criteria of specific queries. We will go onto more detail and give examples of these two areas in the following sections.



    Covering Queries with an Index


    Earlier in the article we discussed how an index, specifically a nonclustered index, contains only the key values and a reference to find the associated row of data. However, if the key value contains all of the information needed to process a query, the database never has to follow the reference and find the row; it can simply retrieve the information from the index and save processing time. This is always a benefit for clustered indexes.



    As an example, consider the index we created on the Products table for UnitPrice. The database copied the values from the UnitPrice column and sorted them into an index. If we execute the following query, the database can retrieve all of the information for the query from the index itself.



    SELECT UnitPrice FROM Products ORDER BY UnitPrice 


    We call these types of queries covered queries, because all of the columns requested in the output are contained in the index itself. A clustered index, if selected for use by the query optimizer, always covers a query, since it contains all of the data in a table.



    For the following query, there are no covering indexes on the Products table.



    SELECT ProductName, UnitPrice FROM Products ORDER BY UnitPrice 


    This is because although the database will use the index on UnitPrice to avoid sorting records, it will need to follow the reference in each index entry to find the associated row and retrieve the product name. By creating a composite index on two columns (ProductName and UnitPrice), we can cover this query with the new index.



    Matching Complex Search Criteria


    For another way to use composite indexes, let's take a look at the OrderDetails table of Northwind. There are two key values in the table (OrderID and ProductID); these are foreign keys, referencing the Orders and Products tables respectively. There is no column dedicated for use as a primary key; instead, the primary key is the combination of the columns OrderID and ProductID.



    The primary key constraint on these columns will generate a composite index, which is unique of course. The command the database would use to create the index looks something like the following:



    CREATE UNIQUE CLUSTERED INDEX PK_Order_Details 
    ON [Order Details] (OrderID, ProductID)


    The order in which columns appear in a CREATE INDEX statement is significant. The primary sort order for this index is OrderID. When the OrderID is the same for two or more records, the database will sort this subset of records on ProductID.



    The order of columns determines how useful the index is for a query. Consider the phone book sorted by last name then first name. The phone book makes it easy to find all of the listings with a last name of Smith, or all of the listings with a last name of Jones and a first name of Lisa, but it is difficult to find all listings with a first name of Gary without scanning the book page by page.



    Likewise, the composite index on Order Details is useful in the following two queries:



    SELECT * FROM [Order Details] WHERE OrderID = 11077 

    SELECT * FROM [Order Details] WHERE OrderID = 11077 AND ProductID = 13


    However, the following query cannot take advantage of the index we created since ProductID is the second part of the index key, just like the first name field in a phone book.



     SELECT * FROM [Order Details] WHERE ProductID = 13


    In this case, ProductID is a primary key, however, so an index does exist on the ProductID column for the database to use for this query.



    Suppose the following query is the most popular query executed by our application, and we decided we needed to tune the database to support it.



    SELECT ProductName, UnitPrice FROM Products ORDER BY UnitPrice 


    We could create the following index to cover the query. Notice we have specified two columns for the index: UnitPrice and ProductName (making the index a composite index):



     CREATE INDEX IX_UnitPrice_ProductName ON Products(UnitPrice, ProductName) 


    While covered queries can provide a performance benefit, remember there is a price to pay for each index we add to a table, and we can also never cover every query in a non-trivial application.



    Additional Index Guidelines



    Choosing the correct columns and types for an index is another important step in creating an effective index. In this section, we will talk about two main points, namely short index keys and selective indexes (we'll explain what selective indexes are in just a moment).



    Keep Index Keys Short


    The larger an index key is, the harder a database has to work to use the index. For instance, an integer key is smaller in size then a character field for holding 100 characters. In particular, keep clustered indexes as short as possible.



    There are several approaches to keeping an index key short. First, try to limit the index to as few columns as possible. While composite indexes are useful and can sometimes optimize a query, they are also larger and cause more disk reads for the database. Secondly, try to choose a compact data type for an index column, based on the number of bytes required for each data type. Integer keys are small and easy for the database to compare. In contrast, strings require a character-by-character comparison.



    As a rule of thumb, try to avoid using character columns in an index, particularly primary key indexes. Integer columns will always have an advantage over character fields in ability to boost the performance of a query.



    Distinct Index Keys


    The most effective indexes are the indexes with a small percentage of duplicated values. Think of having a phone book for a city where 75% of the population has the last name of Smith. A phone book in this area might be easier to use if the entries were sorted by the resident's first names instead. A good index will allow the database to disregard as many records as possible during a search.



    An index with a high percentage of unique values is a selective index. Obviously, a unique index is the most selective index of all, because there are no duplicate values. SQL Server will track statistics for indexes and will know how selective each index is. The query optimizer utilizes these statistics when selecting the best index to use for a query.



    Maintaining Indexes


    In addition to creating an index, we'll need to view existing indexes, and sometimes delete or rename them. This is part of the ongoing maintenance cycle of a database as the schema changes, or even naming conventions change.



    View Existing Indexes


    A list of all indexes on a table is available in the dialog box we used to create an index. Click on the Selected index drop down control and scroll through the available indexes.



    There is also a stored procedure named sp_helpindex. This stored procedure gives all of the indexes for a table, along with all of the relevant attributes. The only input parameter to the procedure is the name of the table, as shown below.



    EXEC sp_helpindex Customers 


    Rename an Index


    We can also rename any user created object with the sp_rename stored procedure, including indexes. The sp_rename procedure takes, at a minimum, the current name of the object and the new name for the object. For indexes, the current name must include the name of the table, a dot separator, and the name of the index, as shown below:



    EXEC sp_rename 'Products.IX_UnitPrice', 'IX_Price'


    This will change the name of the IX_UnitPrice index to IX_Price.



    Delete an Index


    It is a good idea to remove an index from the database if the index is not providing any benefit. For instance, if we know the queries in an application are no longer searching for records on a particular column, we can remove the index. Unneeded indexes only take up storage space and diminish the performance of modifications. You can remove most indexes with the Delete button on the index dialog box, which we saw earlier. The equivalent SQL command is shown below.



    DROP Index Products.IX_Price 


    Again, we need to use the name of the table and the name of the index, with a dot separator. Some indexes are not so easy to drop, namely any index supporting a unique or primary key constraint. For example, the following command tries to drop the PK_Products index of the Products table.



    DROP INDEX Products.PK_Products 


    Since the database uses PK_Products to enforce a primary key constraint on the Products table, the above command should produce the following error.



    An explicit DROP INDEX is not allowed on index 'Products.PK_Products'. 
    It is being used for PRIMARY KEY constraint enforcement.


    Removing a primary key constraint from a table is a redesign of the table, and requires careful thought. It makes sense to know the only way to achieve this task is to either drop the table and use a CREATE TABLE command to recreate the table without the index, or to use the ALTER TABLE command.



    Conclusion



    In this article we learned how to create, manage, and select indexes for SQL Server tables. Most of what we covered is true for any relational database engine. Proper indexes are crucial for good performance in large databases. Sometimes you can make up for a poorly written query with a good index, but it can be hard to make up for poor indexing with even the best queries.