Wahyu Pratama: January 2013

Monday, January 14, 2013

Trying ZFS dedup in FreeBSD 9.1 R

I research ZFS with deduplication running on FreeBSD.

In computing, data deduplication is a specialized data compression technique for eliminating duplicate copies of repeating data. Related and somewhat synonymous terms are intelligent (data) compression and single-instance (data) storage. The technique is used to improve storage utilization and can also be applied to network data transfers to reduce the number of bytes that must be sent. In the deduplication process, unique chunks of data, or byte patterns, are identified and stored during a process of analysis. As the analysis continues, other chunks are compared to the stored copy and whenever a match occurs, the redundant chunk is replaced with a small reference that points to the stored chunk. Given that the same byte pattern may occur dozens, hundreds, or even thousands of times (the match frequency is dependent on the chunk size), the amount of data that must be stored or transferred can be greatly reduced.
http://en.wikipedia.org/wiki/Data_deduplication

My box is running FreeBSD 9.1 R.

root@skyline:/root # uname -v
FreeBSD 9.1-RELEASE #0 r243825: Tue Dec 4 09:23:10 UTC 2012     root@farrell.cse.buffalo.edu:/usr/obj/usr/src/sys/GENERIC
root@skyline:/root # zpool list
NAME    SIZE ALLOC   FREE    CAP DEDUP HEALTH ALTROOT
zroot 1.81T 1.49G 1.81T     0% 1.00x ONLINE -
root@skyline:/root #
root@skyline:/root # zfs get all zroot

NAME    PROPERTY    VALUE    SOURCE

zroot    type    filesystem    -

zroot    creation    Fri    Jan    11    16:52    2013

zroot    used    3.55G    -

zroot    available    1.78T    -

zroot    referenced    373M    -

zroot    compressratio    1.50x    -

zroot    mounted    yes    -

zroot    quota    none    default

zroot    reservation    none    default

zroot    recordsize    128K    default

zroot    mountpoint    legacy    local

zroot    sharenfs    off    default

zroot    checksum    fletcher4    local

zroot    compression    off    default

zroot    atime    on    default

zroot    devices    on    default

zroot    exec    on    default

zroot    setuid    on    default

zroot    readonly    off    default

zroot    jailed    off    default

zroot    snapdir    hidden    default

zroot    aclmode    discard    default

zroot    aclinherit    restricted    default

zroot    canmount    on    default

zroot    xattr    off    temporary

zroot    copies    1    default

zroot    version    5    -

zroot    utf8only    off    -

zroot    normalization    none    -

zroot    casesensitivity    sensitive    -

zroot    vscan    off    default

zroot    nbmand    off    default

zroot    sharesmb    off    default

zroot    refquota    none    default

zroot    refreservation    none    default

zroot    primarycache    all    default

zroot    secondarycache    all    default

zroot    usedbysnapshots    0    -

zroot    usedbydataset    373M    -

zroot    usedbychildren    3.19G    -

zroot    usedbyrefreservation    0    -

zroot    logbias    latency    default

zroot    dedup    off    default

zroot    mlslabel    -

zroot    sync    standard    default

zroot    refcompressratio    1.00x    -

zroot    written    373M    -
root@skyline:/root #

Because the dedup function is turning off, so I have to make it on.

root@skyline:/root # zfs set dedup=on zroot
root@skyline:/root # zfs get compression,dedup zroot
NAME   PROPERTY     VALUE          SOURCE
zroot compression off            local
zroot dedup        on             local
root@skyline:/root #

From that information the dedup musbe on. I am making simulation that the dedup is working.

root@skyline:/ # cd /home/
root@skyline:/home # ls
root@skyline:/home # mkdir Testdedup1
root@skyline:/home # mkdir Testdedup2
root@skyline:/home # mkdir Testdedup3
root@skyline:/home #

root@skyline:/home # du -hs Testdedup*
1.5k    Testdedup1
1.5k    Testdedup2
1.5k    Testdedup3

root@skyline:/home # df -h
Filesystem         Size    Used   Avail Capacity Mounted on

zroot              1.8T    372M    1.8T     0%    /
devfs              1.0k    1.0k      0B   100%    /dev
zroot/tmp          1.8T     35k    1.8T     0%    /tmp
zroot/usr          1.8T    377M    1.8T     0%    /usr
zroot/usr/ports    1.8T    406M    1.8T     0%    /usr/ports
zroot/usr/src      1.8T    358M    1.8T     0%    /usr/src
zroot/var          1.8T    6.3M    1.8T     0%    /var
zroot/var/empty    1.8T     31k    1.8T     0%    /var/empty
zroot/var/run      1.8T     59k    1.8T     0%    /var/run
zroot/var/tmp      1.8T     32k    1.8T     0%    /var/tmproot@skyline:/home # du -hs /home
6.0k    /home
root@skyline:/home #

root@skyline:/root # zpool list
NAME    SIZE ALLOC   FREE    CAP DEDUP HEALTH ALTROOT
zroot 1.81T 1.49G 1.81T     0% 1.00x ONLINE -
root@skyline:/root #

As we can see, that the free ALLOC is 1.49 TB. I will copy the 2.2 GB to the zroot, and other Testdedup folder.

root@skyline:/root # zpool list
NAME    SIZE ALLOC   FREE    CAP DEDUP HEALTH ALTROOT
zroot 1.81T 3.61G 1.81T     0% 1.00x ONLINE -
root@skyline:/root #

root@skyline:/home # du -hs *
2.1G    Testdedup1
1.5k    Testdedup2
1.5k    Testdedup3
root@skyline:/home #

root@skyline:/home # cp Testdedup1/FreeBSD-8.2-RELEASE-i386-dvd1.iso Testdedup2/
root@skyline:/home # du -hs *
2.1G    Testdedup1
2.1G    Testdedup2
1.5k    Testdedup3

root@skyline:/home # zpool list
NAME    SIZE ALLOC   FREE    CAP DEDUP HEALTH ALTROOT
zroot 1.81T 3.62G 1.81T     0% 2.00x ONLINE -
root@skyline:/home #

I copy to another Testdedup3

cp Testdedup1/FreeBSD-8.2-RELEASE-i386-dvd1.iso Testdedup3/
root@skyline:/home # du -hs *
2.1G    Testdedup1
2.1G    Testdedup2
2.1G    Testdedup3
root@skyline:/home #

How about if I rename the source file to FreeBSD-8.2-RELEASE-i386-dvd1.iso.renamefile

root@skyline:/home # mv Testdedup1/FreeBSD-8.2-RELEASE-i386-dvd1.iso Testdedup1/FreeBSD-8.2-RELEASE-i386-dvd1.iso.renamefile
root@skyline:/home # ls
TestRenameDedup1        Testdedup1              Testdedup2              Testdedup3
root@skyline:/home # du -hs *
1.5k    TestRenameDedup1
2.1G    Testdedup1
2.1G    Testdedup2
2.1G    Testdedup3
root@skyline:/home # cp Testdedup1/FreeBSD-8.2-RELEASE-i386-dvd1.iso.renamefile TestRenameDedup1/
root@skyline:/home # du -hs *
2.1G    TestRenameDedup1
2.1G    Testdedup1
2.1G    Testdedup2
2.1G    Testdedup3
root@skyline:/home #

root@skyline:/home # ls -al TestRenameDedup1/
total 2225265
drwxr-xr-x 2 root wheel           3 Jan 14 11:43 .
drwxr-xr-x 6 root wheel           6 Jan 14 11:42 ..
-rw-r--r-- 1 root wheel 2276931584 Jan 14 11:44 FreeBSD-8.2-RELEASE-i386-dvd1.iso.renamefile
root@skyline:/home #

root@skyline:/home # zpool list
NAME    SIZE ALLOC   FREE    CAP DEDUP HEALTH ALTROOT
zroot 1.81T 3.62G 1.81T     0% 4.00x ONLINE -
root@skyline:/home #

As we can see that, the ALLOC is still 3.62 GB

Note :
from man zpool hereis the some of explanation.

alloc        Amount of storage space within the pool that has been physi-
                  cally allocated.
capacity     Percentage of pool space used. This property can also be
                referred to by its shortened column name, "cap".
dedupratio The deduplication ratio specified for a pool, expressed as a
                  multiplier. For example, a value of 1.76 indicates that 1.76
                units of data were stored but only 1 unit of disk space was
                  actually consumed. See zfs(8) for a description of the dedu-
                  plication feature.
free           Number of blocks within the pool that are not allocated.
size       Total size of the storage pool.

Friday, January 4, 2013

Data Center Basic, a brief

Today, ICT stand for Information and Communication Technology, for some people specially in big city in the world is basic needs. As we can see, the technology is growth so fast. Internet connection and telecommunication right now is never sleep. Start from my experience on 1995 my first experience browsing using modem 28 kbps, and today my internet connection in my house is 1 mbps, and the contents is vary from text or news become streaming. So that's why the rapid growth of data become exponential growth.
Data center is vital, from data center design, architecture, requirement etc, must be calculate.

A data center or computer centre (also datacenter) is a facility used to house computer systems and associated components, such as telecommunications and storage systems. It generally includes redundant or backup power supplies, redundant data communications connections, environmental controls (e.g., air conditioning, fire suppression) and security devices.

^{http://en.wikipedia.org/wiki/Data_center}
Availability, in other word is no downtime, even planed or unplanned downtime.

Availability %	Downtime per year	Downtime per month*	Downtime per week
90% ("one nine")	36.5 days	72 hours	16.8 hours
95%	18.25 days	36 hours	8.4 hours
97%	10.96 days	21.6 hours	5.04 hours
98%	7.30 days	14.4 hours	3.36 hours
99% ("two nines")	3.65 days	7.20 hours	1.68 hours
99.5%	1.83 days	3.60 hours	50.4 minutes
99.8%	17.52 hours	86.23 minutes	20.16 minutes
99.9% ("three nines")	8.76 hours	43.8 minutes	10.1 minutes
99.95%	4.38 hours	21.56 minutes	5.04 minutes
99.99% ("four nines")	52.56 minutes	4.32 minutes	1.01 minutes
99.999% ("five nines")	5.26 minutes	25.9 seconds	6.05 seconds
99.9999% ("six nines")	31.5 seconds	2.59 seconds	0.605 seconds
99.99999% ("seven nines")	3.15 seconds	0.259 seconds	0.0605 seconds

http://en.wikipedia.org/wiki/High_availability

As we can see above, total allowable downtime for planned and unplanned downtime. Base on the SLA/SR
In real life, the cause of downtime can several factors for example blackout power electricity, UPS failure, wrong calcutation, hardware failure, software bugs, human error/unskilled operator.

ICT especially Enterprise/middle scale must be managed and organize correctly. Strategic, design, transition, operation and continual service improvement should be implement correctly (base on ITIL), YES it's not easy, but it doesn't mean impossible.

The calculation cause by downtime from "IT outage" impact to the lost of revenue.We can calculate base on this equation below.

Quantify Lost Revenue Costs

Revenue Cost = (Revenue / Annual Hours) X Impact X Outage Hours

Revenue = Gross annual revenue
Annual Hours = Total annual business hours
Impact = Percentage impact (e.g. % reduction in transactions or dollars during outage)
Outage Hours = Number of hours of outage

Quantify Lost Labor Costs

Labor Cost = People X Impact X Rate X Hours

People = Number of workers affected
Impact = Avg. % of work they could not perform
Rate = Average employee cost per hour
Hours = Number of hours of outage

http://www.firescope.com/QuickStart/Unify/Article.asp?ContentID=15

Well, even I work in mining company it's really different with retail or banking etc, but I believe that calculation is close to the real cost.

Solaris fail to boot, stack in grub

In December 10, 2012, my file server getting failure with unknown reason. After booting the system, it stack in grub command with the following error.

"Error 15: File not found"

Information after repair the grub.
[root@northern ~]#uname -a
SunOS northern.myserver.com 5.10 Generic_141445-09 i86pc i386 i86pc
[root@northern ~]#
[root@northern ~]#isainfo -kv
64-bit amd64 kernel modules
[root@northern ~]#

I got the idea and reference from this following link

https://forums.oracle.com/forums/thread.jspa?messageID=8220143

After making plan (because this zpool contain about 1.7 TB) I decided to try repair the boot loader.

Here is my step :
1. Boot from disk solaris.
2. Choose the "Single User Shell"
3. It will search for Installed OS instances ...
4. OS was found with the following list.
somethink like this :

1 zfs-RAID-5.0:DiskID ROOT/s10x_u8wos_08a

5. Try to mount the ZFS pool. Choose the number
6. cd /a
7. Make sure the drive can be read, try with ls command
8. cd /a/boot/grub
9. installgrub stage1 stage 2 /dev/rdsk/c0t0d0s0

Reboot the system, and the system is UP again.

I must check the zpool healthy, unfortunately the zpool is degraded.

[root@northern rdsk]#zpool list
NAME           SIZE   USED AVAIL    CAP HEALTH ALTROOT
zfs-RAID-5.0 1.81T 1.77T 48.4G    97% DEGRADED -
[root@northern rdsk]#
[root@northern rdsk]#zpool status
pool: zfs-RAID-5.0
state: DEGRADED
status: One or more devices has experienced an error resulting in data
        corruption. Applications may be affected.
action: Restore the file in question if possible. Otherwise restore the
        entire pool from backup.
   see: http://www.sun.com/msg/ZFS-8000-8A
scrub: none requested
config:

        NAME        STATE     READ WRITE CKSUM
        zfs-RAID-5.0 DEGRADED     0     0    14
          c0t0d0s0 DEGRADED     0     0    28 too many errors

errors: 1 data errors, use '-v' for a list
[root@northern rdsk]#

This mean that the disk is degraded.

For disaster recovery, always make backup and secondary backup. Make some scenario for disaster recovery, drill the operator and so on.

Wednesday, January 2, 2013

Nginx Configuration example

Here is nginx configuration for some purpose. This example is for roundcube, drupal, and postfix.

server {
listen 80 ;
listen 443 ssl;

#server_name localhost;
server_name myserver.com;

ssl_certificate server.crt;
ssl_certificate_key server.key;

ssl_session_timeout 5m;

ssl_protocols SSLv2 SSLv3 TLSv1;
ssl_ciphers HIGH:!aNULL:!MD5;
ssl_prefer_server_ciphers on;
#charset koi8-r;

#access_log logs/host.access.log main;

location / {
root /usr/local/www/nginx;
index index.html index.htm index.php;
}

#error_page 404 /404.html;

# redirect server error pages to the static page /50x.html
#
error_page 500 502 503 504 /50x.html;
location = /50x.html {
root /usr/local/www/nginx-dist;
}

# proxy the PHP scripts to Apache listening on 127.0.0.1:80
#
#location ~ \.php$ {
# proxy_pass http://127.0.0.1;
#}

# pass the PHP scripts to FastCGI server listening on 127.0.0.1:9000
#
location ~ \.php$ {
root html;
fastcgi_pass 127.0.0.1:9000;
fastcgi_index index.php;
fastcgi_param SCRIPT_FILENAME /usr/local/www/$fastcgi_script_name;
include fastcgi_params;
}

# Alias Directory

location /roundcube {
alias //usr/local/www/roundcube; # Roundcube directory
#deny all;
allow 192.168.7.0/24;
deny all;
index index.php;
}

location ~ /roundcube/.*\.php$ {
if ($fastcgi_script_name ~ /roundcube(/.*\.php)$) {
set $valid_fastcgi_script_name $1;
}

fastcgi_pass 127.0.0.1:9000;
fastcgi_index index.php;
fastcgi_param SCRIPT_FILENAME /usr/local/www/roundcube$valid_fastcgi_script_name;
include fastcgi_params;
}

location /postfixadmin {
alias //usr/local/www/postfixadmin; # Postifx Directory
#deny all;
allow 192.168.7.0/24;
deny all;
index index.php index.html index.htm;
}

location ~ /postfixadmin/.*\.php$ {
if ($fastcgi_script_name ~ /postfixadmin(/.*\.php)$) {
set $valid_fastcgi_script_name $1;
}

fastcgi_pass 127.0.0.1:9000;
fastcgi_index index.php;
fastcgi_param SCRIPT_FILENAME /usr/local/www/postfixadmin$valid_fastcgi_script_name;
include fastcgi_params;
}

location /portal {
alias //usr/local/www/portal; # Drupal directory
#deny all;
# allow 192.168.7.0/24;
allow all;
index index.php index.html index.htm;
}

location ~ /portal/.*\.php$ {
if ($fastcgi_script_name ~ /portal(/.*\.php)$) {
set $valid_fastcgi_script_name $1;
}

fastcgi_pass 127.0.0.1:9000;
fastcgi_index index.php;
fastcgi_param SCRIPT_FILENAME /usr/local/www/portal$valid_fastcgi_script_name;
include fastcgi_params;
}
# End of alias directory
...