Strip Plot with Jitter#

In this chart, we encode the Major_Genre column from the movies dataset in the y-channel. In the default presentation of this data, it would be difficult to gauge the relative frequencies with which different values occur because there would be so much overlap. To address this, we use the yOffset channel to incorporate a random offset (jittering). The example is shown twice, on the left side using normally distributed and on the right side using uniformally distributed jitter.

import altair as alt
from vega_datasets import data

source = data.movies.url

gaussian_jitter = alt.Chart(source, title='Normally distributed jitter').mark_circle(size=8).encode(
    y="Major_Genre:N",
    x="IMDB_Rating:Q",
    yOffset="jitter:Q",
    color=alt.Color('Major_Genre:N').legend(None)
).transform_calculate(
    # Generate Gaussian jitter with a Box-Muller transform
    jitter="sqrt(-2*log(random()))*cos(2*PI*random())"
)

uniform_jitter = gaussian_jitter.transform_calculate(
    # Generate uniform jitter
    jitter='random()'
).encode(
    alt.Y('Major_Genre:N').axis(None)
).properties(
    title='Uniformly distributed jitter'
)

(gaussian_jitter | uniform_jitter).resolve_scale(yOffset='independent')
import altair as alt
from vega_datasets import data

source = data.movies.url

gaussian_jitter = alt.Chart(source, title='Normally distributed jitter').mark_circle(size=8).encode(
    y="Major_Genre:N",
    x="IMDB_Rating:Q",
    yOffset="jitter:Q",
    color=alt.Color('Major_Genre:N', legend=None)
).transform_calculate(
    # Generate Gaussian jitter with a Box-Muller transform
    jitter="sqrt(-2*log(random()))*cos(2*PI*random())"
)

uniform_jitter = gaussian_jitter.transform_calculate(
    # Generate uniform jitter
    jitter='random()'
).encode(
    y=alt.Y('Major_Genre:N', axis=None)
).properties(
    title='Uniformly distributed jitter'
)

(gaussian_jitter | uniform_jitter).resolve_scale(yOffset='independent')