database's docs

3.6.0 RAC-存储配置


1. 存储配置

接下来的存储配置会提供关于以下内容的推荐配置

  • 设置设备的多路径映射
  • 为ASM 硬盘管理使用udev规则或者oracle的ASMLib
  • 为了最佳性能而使用的tuned package

1) 设置DM-Multipath

设备多路径映射提供了聚合多个I/O路径来实现高可用,I/O负载均衡和命名持久化的能力。

注意,在进行一下步骤之前,先确保每个节点都可以访问oracle集群的共享存储。如果是虚拟机环境,请先创建共享磁盘,并挂在到每个模拟节点上

以下是安装和配置设备多路径映射设备的推荐步骤

# 安装device-mapper-multipath
yum install device-mapper-multipath

# 准备配置文件
cp /usr/share/doc/device-mapper-multipath-0.4.9/multipath.conf /etc/

# 获取本地硬盘在系统中的scsi id
scsi_id --whitelisted --replace-whitespace --device=/dev/sda
1ATA_VBOX_HARDDISK_VB542a6eee-dbfb1007


# 修改配置文件,将上面的scsi id加入黑名单
vim /etc/multipath.conf
******************************************************
# virtualbox虚拟环境中,这里必须添加getuid_callout字段中的"--replace-whitespace",因为上面的scsi_id拥有"_"
defaults {
        getuid_callout          "/lib/udev/scsi_id --whitelisted --replace-whitespace --device=/dev/%n"
        user_friendly_names     yes
}

# 禁掉本地磁盘
blacklist {
       wwid 1ATA_VBOX_HARDDISK_VB542a6eee-dbfb1007
        devnode "^(ram|raw|loop|fd|md|dm-|sr|scd|st)[0-9]*"
        devnode "^hd[a-z]"
}
******************************************************

# 重启服务
service multipathd start

# 设置开机启动服务
chkconfig multipathd on

# 查看
multipath -ll
db2 (1ATA_VBOX_HARDDISK_VBa522109e-6e7ba34d) dm-3 ATA,VBOX HARDDISK
size=8.0G features='0' hwhandler='0' wp=rw
`-+- policy='round-robin 0' prio=1 status=active
 `- 4:0:0:0 sdc 8:32  active ready running
db1 (1ATA_VBOX_HARDDISK_VB5fd07f22-14a65bd2) dm-2 ATA,VBOX HARDDISK
size=10G features='0' hwhandler='0' wp=rw
`-+- policy='round-robin 0' prio=1 status=active
 `- 3:0:0:0 sdb 8:16  active ready running
redo (1ATA_VBOX_HARDDISK_VB1bf7dcf2-cc7766bf) dm-5 ATA,VBOX HARDDISK
size=8.0G features='0' hwhandler='0' wp=rw
`-+- policy='round-robin 0' prio=1 status=active
 `- 6:0:0:0 sde 8:64  active ready running
fra (1ATA_VBOX_HARDDISK_VB1f2e19af-e712c25a) dm-4 ATA,VBOX HARDDISK
size=8.0G features='0' hwhandler='0' wp=rw
`-+- policy='round-robin 0' prio=1 status=active
 `- 5:0:0:0 sdd 8:48  active ready running
ocrvote3 (1ATA_VBOX_HARDDISK_VBce272003-a253f798) dm-8 ATA,VBOX HARDDISK
size=1.0G features='0' hwhandler='0' wp=rw
`-+- policy='round-robin 0' prio=1 status=active
 `- 9:0:0:0 sdh 8:112 active ready running
ocrvote2 (1ATA_VBOX_HARDDISK_VB96b54a9f-d8e3fcce) dm-7 ATA,VBOX HARDDISK
size=1.0G features='0' hwhandler='0' wp=rw
`-+- policy='round-robin 0' prio=1 status=active
 `- 8:0:0:0 sdg 8:96  active ready running
ocrvote1 (1ATA_VBOX_HARDDISK_VBa47c63dc-5db76100) dm-6 ATA,VBOX HARDDISK
size=1.0G features='0' hwhandler='0' wp=rw
`-+- policy='round-robin 0' prio=1 status=active
 `- 7:0:0:0 sdf 8:80  active ready running


# 进一步配置
vim /etc/multipath.conf
******************************************************
# 正式环境应该如此配置,但是virtualbox磁盘似乎不支持alua方式,所以prio项在测试环境中需要注释
defaults {
 udev_dir /dev
 polling_interval 10
 path_selector "round-robin 0"
 path_grouping_policy multibus
 getuid_callout "/lib/udev/scsi_id --whitelisted --device=/dev/%n"
 prio alua
 path_checker readsector0
 rr_min_io 100
 max_fds 8192
 rr_weight priorities
 failback immediate
 no_path_retry fail
 user_friendly_names yes
}
# prio字段
# 作用:Specifies the default function to call to obtain a path priority value.
# Possible values include:
#     const: Set a priority of 1 to all paths.
#     emc: Generate the path priority for EMC arrays.
#     alua: Generate the path priority based on the SCSI-3 ALUA settings.
#     netapp: Generate the path priority for NetApp arrays.
#     rdac: Generate the path priority for LSI/Engenio RDAC controller.
#     hp_sw: Generate the path priority for Compaq/HP controller in active/standby mode.
#     hds: Generate the path priority for Hitachi HDS Modular storage arrays.
#     The default value is const.

## 添加一下别名
multipaths {
        multipath {
                wwid                    1ATA_VBOX_HARDDISK_VB5fd07f22-14a65bd2
                alias                   db1
        }
        multipath {
                wwid                    1ATA_VBOX_HARDDISK_VBa522109e-6e7ba34d
                alias                   db2
        }
        multipath {
                wwid                    1ATA_VBOX_HARDDISK_VB1f2e19af-e712c25a
                alias                   fra
        }
        multipath {
                wwid                    1ATA_VBOX_HARDDISK_VB1bf7dcf2-cc7766bf
                alias                   redo
        }
        multipath {
                wwid                    1ATA_VBOX_HARDDISK_VBa47c63dc-5db76100
                alias                   ocrvote1
        }
        multipath {
                wwid                    1ATA_VBOX_HARDDISK_VB96b54a9f-d8e3fcce
                alias                   ocrvote2
        }
        multipath {
                wwid                    1ATA_VBOX_HARDDISK_VBce272003-a253f798
                alias                   ocrvote3
        }
}

# 此处的wwid是需要共享的磁盘的scsi_id,一共7个磁盘
******************************************************

# 重启服务
service multipathd restart

# 查看状态
cat /etc/multipath/bindings
# Multipath bindings, Version : 1.0
# NOTE: this file is automatically maintained by the multipath program.
# You should not need to edit this file in normal circumstances.
#
# Format:
# alias wwid
#
mpatha 1ATA_VBOX_HARDDISK_VB5fd07f22-14a65bd2

multipath -ll
db2 (1ATA_VBOX_HARDDISK_VBa522109e-6e7ba34d) dm-3 ATA,VBOX HARDDISK
size=8.0G features='0' hwhandler='0' wp=rw
`-+- policy='round-robin 0' prio=1 status=active
 `- 4:0:0:0 sdc 8:32  active ready running
db1 (1ATA_VBOX_HARDDISK_VB5fd07f22-14a65bd2) dm-2 ATA,VBOX HARDDISK
size=10G features='0' hwhandler='0' wp=rw
`-+- policy='round-robin 0' prio=1 status=active
 `- 3:0:0:0 sdb 8:16  active ready running
redo (1ATA_VBOX_HARDDISK_VB1bf7dcf2-cc7766bf) dm-5 ATA,VBOX HARDDISK
size=8.0G features='0' hwhandler='0' wp=rw
`-+- policy='round-robin 0' prio=1 status=active
 `- 6:0:0:0 sde 8:64  active ready running
fra (1ATA_VBOX_HARDDISK_VB1f2e19af-e712c25a) dm-4 ATA,VBOX HARDDISK
size=8.0G features='0' hwhandler='0' wp=rw
`-+- policy='round-robin 0' prio=1 status=active
 `- 5:0:0:0 sdd 8:48  active ready running
ocrvote3 (1ATA_VBOX_HARDDISK_VBce272003-a253f798) dm-8 ATA,VBOX HARDDISK
size=1.0G features='0' hwhandler='0' wp=rw
`-+- policy='round-robin 0' prio=1 status=active
 `- 9:0:0:0 sdh 8:112 active ready running
ocrvote2 (1ATA_VBOX_HARDDISK_VB96b54a9f-d8e3fcce) dm-7 ATA,VBOX HARDDISK
size=1.0G features='0' hwhandler='0' wp=rw
`-+- policy='round-robin 0' prio=1 status=active
 `- 8:0:0:0 sdg 8:96  active ready running
ocrvote1 (1ATA_VBOX_HARDDISK_VBa47c63dc-5db76100) dm-6 ATA,VBOX HARDDISK
size=1.0G features='0' hwhandler='0' wp=rw
`-+- policy='round-robin 0' prio=1 status=active
 `- 7:0:0:0 sdf 8:80  active ready running

2) 给共享磁盘分区

在第一个节点,给所有的共享磁盘卷分区(db1,db2,fra,redo,ocrvote1,ocrvote2,ocrvote3)

parted /dev/mapper/db1 mklabel gpt mkpart primary "1 -1"

parted /dev/mapper/db2 mklabel gpt mkpart primary "1 -1"

parted /dev/mapper/fra mklabel gpt mkpart primary "1 -1"

parted /dev/mapper/redo mklabel gpt mkpart primary "1 -1"

parted /dev/mapper/ocrvote1 mklabel gpt mkpart primary "1 -1"

parted /dev/mapper/ocrvote2 mklabel gpt mkpart primary "1 -1"

parted /dev/mapper/ocrvote3 mklabel gpt mkpart primary "1 -1"

检查分区后的磁盘,看名称后是否增加了p1

ll /dev/mapper/[dfor]*
lrwxrwxrwx 1 root root 7 Dec 26 08:51 /dev/mapper/db1 -> ../dm-2
lrwxrwxrwx 1 root root 7 Dec 26 08:52 /dev/mapper/db2 -> ../dm-3
lrwxrwxrwx 1 root root 7 Dec 26 08:52 /dev/mapper/db2p1 -> ../dm-9
lrwxrwxrwx 1 root root 7 Dec 26 08:52 /dev/mapper/fra -> ../dm-4
lrwxrwxrwx 1 root root 8 Dec 26 08:52 /dev/mapper/frap1 -> ../dm-10
lrwxrwxrwx 1 root root 7 Dec 26 08:52 /dev/mapper/ocrvote1 -> ../dm-6
lrwxrwxrwx 1 root root 8 Dec 26 08:52 /dev/mapper/ocrvote1p1 -> ../dm-12
lrwxrwxrwx 1 root root 7 Dec 26 08:52 /dev/mapper/ocrvote2 -> ../dm-7
lrwxrwxrwx 1 root root 8 Dec 26 08:52 /dev/mapper/ocrvote2p1 -> ../dm-13
lrwxrwxrwx 1 root root 7 Dec 26 08:52 /dev/mapper/ocrvote3 -> ../dm-8
lrwxrwxrwx 1 root root 8 Dec 26 08:52 /dev/mapper/ocrvote3p1 -> ../dm-14
lrwxrwxrwx 1 root root 7 Dec 26 08:52 /dev/mapper/redo -> ../dm-5
lrwxrwxrwx 1 root root 8 Dec 26 08:52 /dev/mapper/redop1 -> ../dm-11

若有没有增加的,需要执行以下命令

kpartx -a /dev/mapper/db1
# 如果执行完命令还是没有出现db1p1,需要重启系统

在其他节点执行以下命令

# 依次执行(db1,db2,fra,redo,ocrvote1,ocrvote2,ocrvote3)
kpartx -a /dev/mapper/db1
# 如果执行完命令还是没有出现db1p1,需要重启系统

3) 配置oracle ASM硬盘

配置oracle ASM 需要udev rule或者ASMLib。
Oracle的ASMLib是一个可选的实用程序,用于管理和帮助用户使用Oracle ASM设备,并且不是Oracle ASM磁盘正常运行所必需的。此外,Oracle ASMLib对Oracle数据库性能没有任何影响,需要一个标记为kmod-oracleasm和专有用户空间实用程序的内核模块才能正常工作。
udev规则代表了Oracle ASMLib的替代方法,不需要额外的内核模块,因此在Linux系统上保持了更小的占用空间。
只能同时选用其中一种方法,此处使用udev 。

  1. 在第一个节点,使用root身份,为每个映射的硬盘卷识别Device Mapper Universally Unique IDentifier (DM_UUID)。

    for i in ocrvote1p1 ocrvote2p1 ocrvote3p1 db1p1 db2p1 frap1 redop1; do printf "%s %s\n" "$i" "$(udevadm info --query=all --name=/dev/mapper/$i | grep -i dm_uuid)"; done
    ocrvote1p1 E: DM_UUID=part1-mpath-1ATA_VBOX_HARDDISK_VBa47c63dc-5db76100
    ocrvote2p1 E: DM_UUID=part1-mpath-1ATA_VBOX_HARDDISK_VB96b54a9f-d8e3fcce
    ocrvote3p1 E: DM_UUID=part1-mpath-1ATA_VBOX_HARDDISK_VBce272003-a253f798
    db1p1 E: DM_UUID=part1-mpath-1ATA_VBOX_HARDDISK_VB5fd07f22-14a65bd2
    db2p1 E: DM_UUID=part1-mpath-1ATA_VBOX_HARDDISK_VBa522109e-6e7ba34d
    frap1 E: DM_UUID=part1-mpath-1ATA_VBOX_HARDDISK_VB1f2e19af-e712c25a
    redop1 E: DM_UUID=part1-mpath-1ATA_VBOX_HARDDISK_VB1bf7dcf2-cc7766bf
    
  2. 在/etc/udev/rules.d中创建99-oracle-asmdevices.rules ``` bash vim /etc/udev/rules.d/99-oracle-asmdevices.rules


KERNEL=="dm-",ENV{DM_UUID}=="part1-mpath-1ATA_VBOX_HARDDISK_VBa47c63dc-5db76100",OWNER="grid",GROUP="asmadmin",MODE="0660" KERNEL=="dm-",ENV{DM_UUID}=="part1-mpath-1ATA_VBOX_HARDDISK_VB96b54a9f-d8e3fcce",OWNER="grid",GROUP="asmadmin",MODE="0660" KERNEL=="dm-",ENV{DM_UUID}=="part1-mpath-1ATA_VBOX_HARDDISK_VBce272003-a253f798",OWNER="grid",GROUP="asmadmin",MODE="0660" KERNEL=="dm-",ENV{DM_UUID}=="part1-mpath-1ATA_VBOX_HARDDISK_VB5fd07f22-14a65bd2",OWNER="grid",GROUP="asmadmin",MODE="0660" KERNEL=="dm-",ENV{DM_UUID}=="part1-mpath-1ATA_VBOX_HARDDISK_VBa522109e-6e7ba34d",OWNER="grid",GROUP="asmadmin",MODE="0660" KERNEL=="dm-",ENV{DM_UUID}=="part1-mpath-1ATA_VBOX_HARDDISK_VB1f2e19af-e712c25a",OWNER="grid",GROUP="asmadmin",MODE="0660" KERNEL=="dm-*",ENV{DM_UUID}=="part1-mpath-1ATA_VBOX_HARDDISK_VB1bf7dcf2-cc7766bf",OWNER="grid",GROUP="asmadmin",MODE="0660"


含义是,如果任何`dm-*`设备匹配到了DM_UUID,则将其owner设置为grid用户,将其group设置为asmadmin组,将其权限修改为0660.

3. 拷贝`99-oracle-asmdevices.rules`到集群中所有节点
``` bash
scp /etc/udev/rules.d/99-oracle-asmdevices.rules db-oracle-node2:/etc/udev/rules.d/
  1. 在每个节点上确认分区存在

    for i in db1p1 db2p1 frap1 redop1 ocrvote1p1 ocrvote2p1 ocrvote3p1; do printf "%s %s\n" "$i" "$(ls -ll /dev/mapper/$i)"; done
    db1p1 lrwxrwxrwx 1 root root 8 Dec 26 09:10 /dev/mapper/db1p1 -> ../dm-15
    db2p1 lrwxrwxrwx 1 root root 8 Dec 26 09:05 /dev/mapper/db2p1 -> ../dm-12
    frap1 lrwxrwxrwx 1 root root 8 Dec 26 09:05 /dev/mapper/frap1 -> ../dm-13
    redop1 lrwxrwxrwx 1 root root 7 Dec 26 09:05 /dev/mapper/redop1 -> ../dm-9
    ocrvote1p1 lrwxrwxrwx 1 root root 8 Dec 26 09:05 /dev/mapper/ocrvote1p1 -> ../dm-11
    ocrvote2p1 lrwxrwxrwx 1 root root 8 Dec 26 09:05 /dev/mapper/ocrvote2p1 -> ../dm-10
    ocrvote3p1 lrwxrwxrwx 1 root root 8 Dec 26 09:05 /dev/mapper/ocrvote3p1 -> ../dm-14
    
  2. 在每个节点上确认udev rules ``` bash # 根据上面分区对应的dm-x来确定执行命令的对象 udevadm test /sys/block/dm-9 udevadm test /sys/block/dm-10 udevadm test /sys/block/dm-11 udevadm test /sys/block/dm-12 udevadm test /sys/block/dm-13 udevadm test /sys/block/dm-14 udevadm test /sys/block/dm-15

检测结果

ll /dev/dm-*|grep grid brw-rw----. 1 grid asmadmin 253, 10 Dec 26 09:35 /dev/dm-10 brw-rw----. 1 grid asmadmin 253, 11 Dec 26 09:35 /dev/dm-11 brw-rw----. 1 grid asmadmin 253, 12 Dec 26 09:35 /dev/dm-12 brw-rw----. 1 grid asmadmin 253, 13 Dec 26 09:35 /dev/dm-13 brw-rw----. 1 grid asmadmin 253, 14 Dec 26 09:35 /dev/dm-14 brw-rw----. 1 grid asmadmin 253, 15 Dec 26 09:35 /dev/dm-15 brw-rw----. 1 grid asmadmin 253, 9 Dec 26 09:34 /dev/dm-9


#### 4) 使用Automatic System Tuning来优化数据库存储
建议使用Red Hat Enterprise Linux 6中的调整软件包,通过使用配置文件自动调整系统以适应常见工作负载,每个配置文件针对不同的工作负载场景进行了调整,例如
- enterprise-storage
- power savings
- high network throughput

建议创建一个与enterprise-storage profile相同的内容自定义配置文件,但禁用Transparent HugePages (THP)   
在每一个集群内节点上使用root执行以下操作
``` bash
# 安装tuned
yum install tuned
# 开机启动设置
chkconfig tuned on
# 启动服务
service tuned start

# 拷贝一个自定义配置文件
cd /etc/tune-profiles/
cp -r enterprise-storage enterprise-storage-no-thp

# 禁用thp
vim /etc/tune-profiles/enterprise-storage-no-thp/ktune.sh
******************************************************
# 修改 set_transparent_hugepages always为
set_transparent_hugepages never
******************************************************

# 选定自定义的配置文件
tuned-adm profile enterprise-storage-no-thp
Stopping tuned:                                            [  OK  ]
Switching to profile 'enterprise-storage-no-thp'
Applying deadline elevator: dm-0 dm-1 dm-10 dm-11 dm-12 dm-13 dm-14 dm-15 dm-2 dm-3 dm-4 dm-5 dm-6 dm-7 dm-8 dm-9 sda sdb sdc sdd sde sdf sdg s[  OK  ]
Applying ktune sysctl settings:
/etc/ktune.d/tunedadm.conf:                                [  OK  ]
Calling '/etc/ktune.d/tunedadm.sh start':                  [  OK  ]
Applying sysctl settings from /etc/sysctl.conf
Starting tuned:                                            [  OK  ]

# 检查thp是否禁用
cat /sys/kernel/mm/redhat_transparent_hugepage/enabled
always madvise [never]