Understand how linux containers works with practical examples

Linux Namespaces

Linux control groups (cgroups)

Container Fundamentals (key technologies)

Process namespace fundamentals

$ lsns -t pid
$ echo $$ # parent PID
$ unshare --fork --pid --mount-proc zsh
$ sleep 300 &
$ sleep 300 &
$ sleep 300 &
$ sleep 300 &
$ sleep 300 &
$ top
$ ps f -g <PPID>
$ lsns -t pid

Filesystem — Overlay FS fundamentals

$ cd /tmp
$ mkdir {lower1,lower2,upper,work,merged}
$ echo "Lower 1 - original" > lower1/file1.txt
$ echo "Lower 2 - original" > lower2/file2.txt
$ mount -t overlay -o lowerdir=/tmp/lower1:/tmp/lower2,upperdir=/tmp/upper,workdir=/tmp/work none /tmp/merged
$ cd /tmp/merged
$ echo "file created in merged directory" > file_created.txt
$ echo "file 1 modified" > file1.txt
$ cd /tmp
$ umount /tmp/merged
$ find -name '*.txt' -type f 2>/dev/null | while read fn; do echo ">> cat $fn"; cat $fn; done

Networking — Linux bridge fundamentals

$ ip link add br-net type bridge
$ ip link
$ ip addr add 192.168.55.1/24 brd + dev br-net
$ ip link set br-net up
$ ip netns add ns1
$ ip netns add ns2
$ ip link add veth-ns1 type veth peer name br-ns1
$ ip link add veth-ns2 type veth peer name br-ns2
$ ip link set veth-ns1 netns ns1
$ ip link set veth-ns2 netns ns2
$ ip link set br-ns1 master br-net
$ ip link set br-ns2 master br-net
$ ip -n ns1 addr add 192.168.55.2/24 dev veth-ns1
$ ip -n ns2 addr add 192.168.55.3/24 dev veth-ns2
$ ip -n ns1 link set veth-ns1 up
$ ip -n ns2 link set veth-ns2 up
$ ip link set dev br-ns1 up
$ ip link set dev br-ns2 up
$ ip -n ns1 route add default via 192.168.55.1 dev veth-ns1 
$ ip -n ns2 route add default via 192.168.55.1 dev veth-ns2
$ sysctl -w net.ipv4.ip_forward=1
$ iptables -t nat -A POSTROUTING -s 192.168.55.0/24 ! -o br-net -j MASQUERADE

Control groups (cgroups) fundamentals

$ mkdir -p /mycg/{memory,cpusets,cpu}
$ mount -t cgroup -o memory none /mycg/memory
$ mount -t cgroup -o cpu,cpuacct none /mycg/cpu
$ mount -t cgroup -o cpuset none /mycg/cpusets
mkdir -p /mycg/cpu/user{1..3}
# 2048 / (2048 + 512 + 80) = 77%
$ echo 2048 > /mycg/cpu/user1/cpu.shares
# 512 / (2048 + 512 + 80) = 19%
$ echo 512 > /mycg/cpu/user2/cpu.shares
# 80 / (2048 + 512 + 80) = 3%
$ echo 80 > /mycg/cpu/user3/cpu.shares
$ cat /dev/urandom &> /dev/null &
$ PID1=$!
$ cat /dev/urandom &> /dev/null &
$ PID2=$!
$ cat /dev/urandom &> /dev/null &
$ PID2=$!
$ echo $PID1 > /mycg/cpu/user1/tasks
$ echo $PID2 > /mycg/cpu/user2/tasks
$ echo $PID3 > /mycg/cpu/user3/tasks
$ top

Create a container from scratch

$ docker pull debian
$ docker save debian -o debian.tar
$ mkdir debian_layer
$ mkdir -p fs/{lower,upper,work,merged}
$ tar xf debian.tar -C debian_layer
$ find debian_layer -name 'layer.tar' -exec tar xf {} -C fs/lower \;
$ ip netns add cnt
$ ip link add br-cnt type bridge
$ ip addr add 192.168.22.1/24 brd + dev br-cnt
$ ip link set br-cnt up
$ sysctl -w net.ipv4.ip_forward=1
$ iptables -t nat -I POSTROUTING 1 -s 192.168.22.0/24 ! -o br-cnt -j MASQUERADE
$ mount -vt overlay -o lowerdir=./fs/lower,upperdir=./fs/upper,workdir=./fs/work none ./fs/merged
$ mount -v --bind /dev ./fs/merged/dev
$ unshare --fork --pid --net=/var/run/netns/cnt chroot ./fs/merged \
/usr/bin/env -i PATH=/bin:/usr/bin:/sbin:/usr/sbin TERM="$TERM" \
/bin/bash --login +h
# Mount proc within container
$ mount -vt proc proc /proc
$ ip link add veth-cnt type veth peer name br-veth-cnt
$ ip link set veth-cnt netns cnt
$ ip link set br-veth-cnt master br-cnt
$ ip link set br-veth-cnt up
$ ip -n cnt addr add 192.168.22.2/24 dev veth-cnt
$ ip -n cnt link set lo up
$ ip -n cnt link set veth-cnt up
$ ip -n cnt route add default via 192.168.22.1 dev veth-cnt
$ ip netns exec cnt ping -c 3 1.1.1.1
$ mkdir /sys/fs/cgroup/memory/cnt
$ echo 10000000 > /sys/fs/cgroup/memory/cnt/memory.limit_in_bytes
$ echo 0 > /sys/fs/cgroup/memory/cnt/memory.swappiness
$ CHILD_PID=$(lsns -t pid | grep "[/]bin/bash --login +h" | awk '{print $4}')
$ echo $CHILD_PID > /sys/fs/cgroup/memory/cnt/tasks
$ apt update
$ apt install nginx procps curl -y
$ nginx
$ curl 127.0.0.1:80
$ curl 192.168.22.2:80 # from host
$ cat <( </dev/zero head -c 15m) <(sleep 15) | tail
$ umount /proc # within container
$ exit # within container
$ umount -R ./fs/merged
$ ip link del br-veth-cnt
$ ip link del br-cnt
$ ip netns del cnt # grep cnt /proc/mounts

Inspect Namespaces within a docker container

Install docker CE

$ curl -fsSL https://get.docker.com -o install_docker.sh
$ less install_docker.sh # optional
$ sh install_docker.sh
$ usermod -aG docker $USER
$ newgrp docker # Or logout and login

Inspect Docker Network

$ docker network create mynet
$ BR_NAME=$(ip link | grep -v '@' | awk '/br-/{gsub(":",""); print $2}')
$ ip addr show ${BR_NAME}
$ docker network inspect mynet | grep Subnet
$ docker run --name nginx --net mynet -d --rm -p 8080:80 nginx
$ CONTAINER_ID=$(docker container ps | awk '/nginx/{print $1}')
$ CONTAINER_PID=$(docker inspect -f '{{.State.Pid}}' ${CONTAINER_ID})
$ mkdir -p /var/run/netns/
$ ln -sfT /proc/${CONTAINER_PID}/ns/net /var/run/netns/${CONTAINER_ID}
$ ip netns list
$ ip -n ${CONTAINER_ID} link show eth0
$ ip -n ${CONTAINER_ID} addr show eth0
$ docker container inspect nginx | grep IPAddress
$ iptables -t nat -nvL

Inspect cgroups in a docker container

$ docker run --name test_cg --memory=10m --cpus=.1 -it --rm ubuntu
$ CONTAINER_ID=$(docker container ps --no-trunc | awk '/test_cg/{print $1}')
$ tree /sys/fs/cgroup/{memory,cpu}/docker/${CONTAINER_ID}
$ docker container top test_cg | tail -n 1 | awk '{print $2}' # container parent PID
$ cat /sys/fs/cgroup/{memory,cpu}/docker/${CONTAINER_ID}/tasks # the same as container parent PID
$ docker container stats test_cg
$ cat /dev/urandom &> /dev/null
$ cat <( </dev/zero head -c 50m) <(sleep 30) | tail

Inspect overlay fs in a docker container

$ docker run --name test_overlayfs -it --rm debian
$ docker container inspect test_overlayfs -f '{{.GraphDriver.Data.LowerDir}}' | awk 'BEGIN{FS=":"}{for (i=1; i<= NF; i++) print $i}' | while read low; do tree -L 2 $low; done | less
$ docker container inspect test_overlayfs -f '{{.GraphDriver.Data.UpperDir}}' | while read upper; do tree $upper; done | less
$ apt update && apt install nmap -y
$ docker container inspect test_overlayfs -f '{{.GraphDriver.Data.UpperDir}}' | while read upper; do tree $upper; done | less

Inspect docker process namespace

$ docker run --name test_ps -it --rm ubuntu
$ sleep 600 &
$ sleep 600 &
$ sleep 600 &
$ sleep 600 &
$ sleep 600 &
$ top
$ CONTAINER_PID=$(docker container top test_ps | sed -n '2p' | awk '{print $2}')
$ ps f -g ${CONTAINER_PID}
$ lsns -t pid
$ docker container top test_ps

Conclusion

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store
Ivan Moreno

Ivan Moreno

Engineer || MSc student || DevOps in progress