Understand how linux containers works with practical examples

Linux Namespaces

Linux control groups (cgroups)

Container Fundamentals (key technologies)

Process namespace fundamentals

$ lsns -t pid
$ echo $$ # parent PID
$ unshare --fork --pid --mount-proc zsh
$ sleep 300 &
$ sleep 300 &
$ sleep 300 &
$ sleep 300 &
$ sleep 300 &
$ top
$ ps f -g <PPID>
$ lsns -t pid

Filesystem — Overlay FS fundamentals

$ cd /tmp
$ mkdir {lower1,lower2,upper,work,merged}
$ echo "Lower 1 - original" > lower1/file1.txt
$ echo "Lower 2 - original" > lower2/file2.txt
$ mount -t overlay -o lowerdir=/tmp/lower1:/tmp/lower2,upperdir=/tmp/upper,workdir=/tmp/work none /tmp/merged
$ cd /tmp/merged
$ echo "file created in merged directory" > file_created.txt
$ echo "file 1 modified" > file1.txt
$ cd /tmp
$ umount /tmp/merged
$ find -name '*.txt' -type f 2>/dev/null | while read fn; do echo ">> cat $fn"; cat $fn; done

Networking — Linux bridge fundamentals

$ ip link add br-net type bridge
$ ip link
$ ip addr add 192.168.55.1/24 brd + dev br-net
$ ip link set br-net up
$ ip netns add ns1
$ ip netns add ns2
$ ip link add veth-ns1 type veth peer name br-ns1
$ ip link add veth-ns2 type veth peer name br-ns2
$ ip link set veth-ns1 netns ns1
$ ip link set veth-ns2 netns ns2
$ ip link set br-ns1 master br-net
$ ip link set br-ns2 master br-net
$ ip -n ns1 addr add 192.168.55.2/24 dev veth-ns1
$ ip -n ns2 addr add 192.168.55.3/24 dev veth-ns2
$ ip -n ns1 link set veth-ns1 up
$ ip -n ns2 link set veth-ns2 up
$ ip link set dev br-ns1 up
$ ip link set dev br-ns2 up
$ ip -n ns1 route add default via 192.168.55.1 dev veth-ns1 
$ ip -n ns2 route add default via 192.168.55.1 dev veth-ns2
$ sysctl -w net.ipv4.ip_forward=1
$ iptables -t nat -A POSTROUTING -s 192.168.55.0/24 ! -o br-net -j MASQUERADE

Control groups (cgroups) fundamentals

$ mkdir -p /mycg/{memory,cpusets,cpu}
$ mount -t cgroup -o memory none /mycg/memory
$ mount -t cgroup -o cpu,cpuacct none /mycg/cpu
$ mount -t cgroup -o cpuset none /mycg/cpusets
mkdir -p /mycg/cpu/user{1..3}
# 2048 / (2048 + 512 + 80) = 77%
$ echo 2048 > /mycg/cpu/user1/cpu.shares
# 512 / (2048 + 512 + 80) = 19%
$ echo 512 > /mycg/cpu/user2/cpu.shares
# 80 / (2048 + 512 + 80) = 3%
$ echo 80 > /mycg/cpu/user3/cpu.shares
$ cat /dev/urandom &> /dev/null &
$ PID1=$!
$ cat /dev/urandom &> /dev/null &
$ PID2=$!
$ cat /dev/urandom &> /dev/null &
$ PID2=$!
$ echo $PID1 > /mycg/cpu/user1/tasks
$ echo $PID2 > /mycg/cpu/user2/tasks
$ echo $PID3 > /mycg/cpu/user3/tasks
$ top

Create a container from scratch

$ docker pull debian
$ docker save debian -o debian.tar
$ mkdir debian_layer
$ mkdir -p fs/{lower,upper,work,merged}
$ tar xf debian.tar -C debian_layer
$ find debian_layer -name 'layer.tar' -exec tar xf {} -C fs/lower \;
$ ip netns add cnt
$ ip link add br-cnt type bridge
$ ip addr add 192.168.22.1/24 brd + dev br-cnt
$ ip link set br-cnt up
$ sysctl -w net.ipv4.ip_forward=1
$ iptables -t nat -I POSTROUTING 1 -s 192.168.22.0/24 ! -o br-cnt -j MASQUERADE
$ mount -vt overlay -o lowerdir=./fs/lower,upperdir=./fs/upper,workdir=./fs/work none ./fs/merged
$ mount -v --bind /dev ./fs/merged/dev
$ unshare --fork --pid --net=/var/run/netns/cnt chroot ./fs/merged \
/usr/bin/env -i PATH=/bin:/usr/bin:/sbin:/usr/sbin TERM="$TERM" \
/bin/bash --login +h
# Mount proc within container
$ mount -vt proc proc /proc
$ ip link add veth-cnt type veth peer name br-veth-cnt
$ ip link set veth-cnt netns cnt
$ ip link set br-veth-cnt master br-cnt
$ ip link set br-veth-cnt up
$ ip -n cnt addr add 192.168.22.2/24 dev veth-cnt
$ ip -n cnt link set lo up
$ ip -n cnt link set veth-cnt up
$ ip -n cnt route add default via 192.168.22.1 dev veth-cnt
$ ip netns exec cnt ping -c 3 1.1.1.1
$ mkdir /sys/fs/cgroup/memory/cnt
$ echo 10000000 > /sys/fs/cgroup/memory/cnt/memory.limit_in_bytes
$ echo 0 > /sys/fs/cgroup/memory/cnt/memory.swappiness
$ CHILD_PID=$(lsns -t pid | grep "[/]bin/bash --login +h" | awk '{print $4}')
$ echo $CHILD_PID > /sys/fs/cgroup/memory/cnt/tasks
$ apt update
$ apt install nginx procps curl -y
$ nginx
$ curl 127.0.0.1:80
$ curl 192.168.22.2:80 # from host
$ cat <( </dev/zero head -c 15m) <(sleep 15) | tail
$ umount /proc # within container
$ exit # within container
$ umount -R ./fs/merged
$ ip link del br-veth-cnt
$ ip link del br-cnt
$ ip netns del cnt # grep cnt /proc/mounts

Inspect Namespaces within a docker container

Install docker CE

$ curl -fsSL https://get.docker.com -o install_docker.sh
$ less install_docker.sh # optional
$ sh install_docker.sh
$ usermod -aG docker $USER
$ newgrp docker # Or logout and login

Inspect Docker Network

$ docker network create mynet
$ BR_NAME=$(ip link | grep -v '@' | awk '/br-/{gsub(":",""); print $2}')
$ ip addr show ${BR_NAME}
$ docker network inspect mynet | grep Subnet
$ docker run --name nginx --net mynet -d --rm -p 8080:80 nginx
$ CONTAINER_ID=$(docker container ps | awk '/nginx/{print $1}')
$ CONTAINER_PID=$(docker inspect -f '{{.State.Pid}}' ${CONTAINER_ID})
$ mkdir -p /var/run/netns/
$ ln -sfT /proc/${CONTAINER_PID}/ns/net /var/run/netns/${CONTAINER_ID}
$ ip netns list
$ ip -n ${CONTAINER_ID} link show eth0
$ ip -n ${CONTAINER_ID} addr show eth0
$ docker container inspect nginx | grep IPAddress
$ iptables -t nat -nvL

Inspect cgroups in a docker container

$ docker run --name test_cg --memory=10m --cpus=.1 -it --rm ubuntu
$ CONTAINER_ID=$(docker container ps --no-trunc | awk '/test_cg/{print $1}')
$ tree /sys/fs/cgroup/{memory,cpu}/docker/${CONTAINER_ID}
$ docker container top test_cg | tail -n 1 | awk '{print $2}' # container parent PID
$ cat /sys/fs/cgroup/{memory,cpu}/docker/${CONTAINER_ID}/tasks # the same as container parent PID
$ docker container stats test_cg
$ cat /dev/urandom &> /dev/null
$ cat <( </dev/zero head -c 50m) <(sleep 30) | tail

Inspect overlay fs in a docker container

$ docker run --name test_overlayfs -it --rm debian
$ docker container inspect test_overlayfs -f '{{.GraphDriver.Data.LowerDir}}' | awk 'BEGIN{FS=":"}{for (i=1; i<= NF; i++) print $i}' | while read low; do tree -L 2 $low; done | less
$ docker container inspect test_overlayfs -f '{{.GraphDriver.Data.UpperDir}}' | while read upper; do tree $upper; done | less
$ apt update && apt install nmap -y
$ docker container inspect test_overlayfs -f '{{.GraphDriver.Data.UpperDir}}' | while read upper; do tree $upper; done | less

Inspect docker process namespace

$ docker run --name test_ps -it --rm ubuntu
$ sleep 600 &
$ sleep 600 &
$ sleep 600 &
$ sleep 600 &
$ sleep 600 &
$ top
$ CONTAINER_PID=$(docker container top test_ps | sed -n '2p' | awk '{print $2}')
$ ps f -g ${CONTAINER_PID}
$ lsns -t pid
$ docker container top test_ps

Conclusion

--

--

--

Engineer || MSc student || DevOps in progress

Love podcasts or audiobooks? Learn on the go with our new app.

Recommended from Medium

4 Proper Advantages you receive if you outsource Mobile app development

Top Platforms to launch your next SaaS Project

Go Fibonacci Go

KOtlin Dependency Injection (KODI)

Python Tutorials

Maximizing Your Salesforce Investment Through Integration

3 Commands to Up Your Git Game

How I Became a Developer Advocate

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store
Ivan Moreno

Ivan Moreno

Engineer || MSc student || DevOps in progress

More from Medium

Managing DNS Servers with Ansible and Jenkins (Unbound, BIND)

How to mount a host directory in a Docker container? — Devops Mania

Introduction to Docker and Kubernetes: Part I

How to scan vulnerabilities for Docker container images — Part1