Seaborn fails to plot heatmap for a particular feature (titanic dataset)

Issue

I am working with some neural networks and I am struggling to plot a correlation heatmap for the titanic dataset using seaborn. To be concise: it seems that there is a problem with the ‘n_siblings_spouses’ features during the plotting. I don’t know if the problem is due to the feature itself (spacing, maybe?) or if there is an intrinsic issue with seaborn.

Would it be possible to solve the issue without the need to remove the feature from the dataset?

Here is a MWE. And thanks in advance!

from __future__ import absolute_import,division,print_function,unicode_literals
import numpy as np 
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import rc, font_manager
%matplotlib inline

from IPython.display import clear_output
from six.moves import urllib
import tensorflow.compat.v2.feature_column as fc 
import tensorflow as tf 
import seaborn as sns

rc('text', usetex=True)
matplotlib.rcParams['text.latex.preamble'] = [r'\usepackage{amsmath}']

# only if needed
#!apt install texlive-fonts-recommended texlive-fonts-extra cm-super dvipng
plt.rc('font', family='serif')

# URL address of data
TRAIN_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/train.csv"

# Downloading data
train_file_path = tf.keras.utils.get_file("train.csv", TRAIN_DATA_URL)

# Setting numpy default values.
np.set_printoptions(precision=3, suppress=True)

# Reading data
data_train = pd.read_csv(train_file_path)

print("\n TRAIN DATA SET")
print(data_train.head(),"\n")

def heatMap(df):
    #Create Correlation df
    corr = df.corr()
    #Plot figsize
    fig, ax = plt.subplots(figsize=(10, 10))
    #Generate Color Map
    colormap = sns.diverging_palette(220, 10, as_cmap=True)
    #Generate Heat Map, allow annotations and place floats in map
    sns.heatmap(corr, cmap=colormap, annot=True, fmt=".2f")
    #Apply xticks
    plt.xticks(range(len(corr.columns)), corr.columns);
    #Apply yticks
    plt.yticks(range(len(corr.columns)), corr.columns)
    #show plot
    plt.show()

heatMap(data_train)

Here is the issue that is raised when trying to execute the heatMap function (I am working in Colab. However, this also happens in console):

---------------------------------------------------------------------------
CalledProcessError                        Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/matplotlib/texmanager.py in _run_checked_subprocess(self, command, tex)
    305                                              cwd=self.texcache,
--> 306                                              stderr=subprocess.STDOUT)
    307         except FileNotFoundError as exc:

22 frames
CalledProcessError: Command '['latex', '-interaction=nonstopmode', '--halt-on-error', '/root/.cache/matplotlib/tex.cache/bf616eae1512bede263889c8e1d8fb21.tex']' returned non-zero exit status 1.

The above exception was the direct cause of the following exception:

RuntimeError                              Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/matplotlib/texmanager.py in _run_checked_subprocess(self, command, tex)
    317                     prog=command[0],
    318                     tex=tex.encode('unicode_escape'),
--> 319                     exc=exc.output.decode('utf-8'))) from exc
    320         _log.debug(report)
    321         return report

RuntimeError: latex was not able to process the following string:
b'n_siblings_spouses'

Here is the full report generated by latex:
This is pdfTeX, Version 3.14159265-2.6-1.40.18 (TeX Live 2017/Debian) (preloaded format=latex)
 restricted \write18 enabled.
entering extended mode
(/root/.cache/matplotlib/tex.cache/bf616eae1512bede263889c8e1d8fb21.tex
LaTeX2e <2017-04-15>
Babel <3.18> and hyphenation patterns for 3 language(s) loaded.
(/usr/share/texlive/texmf-dist/tex/latex/base/article.cls
Document Class: article 2014/09/29 v1.4h Standard LaTeX document class
(/usr/share/texlive/texmf-dist/tex/latex/base/size10.clo))
(/usr/share/texlive/texmf-dist/tex/latex/type1cm/type1cm.sty)
(/usr/share/texmf/tex/latex/cm-super/type1ec.sty
(/usr/share/texlive/texmf-dist/tex/latex/base/t1cmr.fd))
(/usr/share/texlive/texmf-dist/tex/latex/base/textcomp.sty
(/usr/share/texlive/texmf-dist/tex/latex/base/ts1enc.def))
(/usr/share/texlive/texmf-dist/tex/latex/base/inputenc.sty
(/usr/share/texlive/texmf-dist/tex/latex/base/utf8.def
(/usr/share/texlive/texmf-dist/tex/latex/base/t1enc.dfu)
(/usr/share/texlive/texmf-dist/tex/latex/base/ot1enc.dfu)
(/usr/share/texlive/texmf-dist/tex/latex/base/omsenc.dfu)
(/usr/share/texlive/texmf-dist/tex/latex/base/ts1enc.dfu)))
(/usr/share/texlive/texmf-dist/tex/latex/geometry/geometry.sty
(/usr/share/texlive/texmf-dist/tex/latex/graphics/keyval.sty)
(/usr/share/texlive/texmf-dist/tex/generic/oberdiek/ifpdf.sty)
(/usr/share/texlive/texmf-dist/tex/generic/oberdiek/ifvtex.sty)
(/usr/share/texlive/texmf-dist/tex/generic/ifxetex/ifxetex.sty)

Package geometry Warning: Over-specification in `h'-direction.
    `width' (5058.9pt) is ignored.


Package geometry Warning: Over-specification in `v'-direction.
    `height' (5058.9pt) is ignored.

) (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsmath.sty
For additional information on amsmath, use the `?' option.
(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amstext.sty
(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsgen.sty))
(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsbsy.sty)
(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsopn.sty))
(./bf616eae1512bede263889c8e1d8fb21.aux)
(/usr/share/texlive/texmf-dist/tex/latex/base/ts1cmr.fd)
*geometry* driver: auto-detecting
*geometry* detected driver: dvips
! Missing $ inserted.
<inserted text> 
                $
l.19 {\rmfamily n_
                  siblings_spouses}
No pages of output.
Transcript written on bf616eae1512bede263889c8e1d8fb21.log.


---------------------------------------------------------------------------
CalledProcessError                        Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/matplotlib/texmanager.py in _run_checked_subprocess(self, command, tex)
    305                                              cwd=self.texcache,
--> 306                                              stderr=subprocess.STDOUT)
    307         except FileNotFoundError as exc:

21 frames
CalledProcessError: Command '['latex', '-interaction=nonstopmode', '--halt-on-error', '/root/.cache/matplotlib/tex.cache/bf616eae1512bede263889c8e1d8fb21.tex']' returned non-zero exit status 1.

The above exception was the direct cause of the following exception:

RuntimeError                              Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/matplotlib/texmanager.py in _run_checked_subprocess(self, command, tex)
    317                     prog=command[0],
    318                     tex=tex.encode('unicode_escape'),
--> 319                     exc=exc.output.decode('utf-8'))) from exc
    320         _log.debug(report)
    321         return report

RuntimeError: latex was not able to process the following string:
b'n_siblings_spouses'

Here is the full report generated by latex:
This is pdfTeX, Version 3.14159265-2.6-1.40.18 (TeX Live 2017/Debian) (preloaded format=latex)
 restricted \write18 enabled.
entering extended mode
(/root/.cache/matplotlib/tex.cache/bf616eae1512bede263889c8e1d8fb21.tex
LaTeX2e <2017-04-15>
Babel <3.18> and hyphenation patterns for 3 language(s) loaded.
(/usr/share/texlive/texmf-dist/tex/latex/base/article.cls
Document Class: article 2014/09/29 v1.4h Standard LaTeX document class
(/usr/share/texlive/texmf-dist/tex/latex/base/size10.clo))
(/usr/share/texlive/texmf-dist/tex/latex/type1cm/type1cm.sty)
(/usr/share/texmf/tex/latex/cm-super/type1ec.sty
(/usr/share/texlive/texmf-dist/tex/latex/base/t1cmr.fd))
(/usr/share/texlive/texmf-dist/tex/latex/base/textcomp.sty
(/usr/share/texlive/texmf-dist/tex/latex/base/ts1enc.def))
(/usr/share/texlive/texmf-dist/tex/latex/base/inputenc.sty
(/usr/share/texlive/texmf-dist/tex/latex/base/utf8.def
(/usr/share/texlive/texmf-dist/tex/latex/base/t1enc.dfu)
(/usr/share/texlive/texmf-dist/tex/latex/base/ot1enc.dfu)
(/usr/share/texlive/texmf-dist/tex/latex/base/omsenc.dfu)
(/usr/share/texlive/texmf-dist/tex/latex/base/ts1enc.dfu)))
(/usr/share/texlive/texmf-dist/tex/latex/geometry/geometry.sty
(/usr/share/texlive/texmf-dist/tex/latex/graphics/keyval.sty)
(/usr/share/texlive/texmf-dist/tex/generic/oberdiek/ifpdf.sty)
(/usr/share/texlive/texmf-dist/tex/generic/oberdiek/ifvtex.sty)
(/usr/share/texlive/texmf-dist/tex/generic/ifxetex/ifxetex.sty)

Package geometry Warning: Over-specification in `h'-direction.
    `width' (5058.9pt) is ignored.


Package geometry Warning: Over-specification in `v'-direction.
    `height' (5058.9pt) is ignored.

) (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsmath.sty
For additional information on amsmath, use the `?' option.
(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amstext.sty
(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsgen.sty))
(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsbsy.sty)
(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsopn.sty))
(./bf616eae1512bede263889c8e1d8fb21.aux)
(/usr/share/texlive/texmf-dist/tex/latex/base/ts1cmr.fd)
*geometry* driver: auto-detecting
*geometry* detected driver: dvips
! Missing $ inserted.
<inserted text> 
                $
l.19 {\rmfamily n_
                  siblings_spouses}
No pages of output.
Transcript written on bf616eae1512bede263889c8e1d8fb21.log.


<Figure size 720x720 with 2 Axes>

Solution

To solve this problem, I came across this information that Colab needs a Tex-related module. There was also an excellent answer to SO.

You will need to install the following

  • ! sudo apt-get install texlive-latex-recommended
  • ! sudo apt-get install dvipng texlive-fonts-recommended
  • ! wget http://mirrors.ctan.org/macros/latex/contrib/type1cm.zip
  • ! unzip type1cm.zip -d /tmp/type1cm
  • ! cd /tmp/type1cm/type1cm/ && sudo latex type1cm.ins
  • ! sudo mkdir /usr/share/texmf/tex/latex/type1cm
  • ! sudo cp /tmp/type1cm/type1cm/type1cm.sty /usr/share/texmf/tex/latex/type1cm
  • ! sudo texhash
  • ! sudo apt install cm-super
from __future__ import absolute_import,division,print_function,unicode_literals
import numpy as np 
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
# from matplotlib import rc, font_manager
%matplotlib inline

from IPython.display import clear_output
from six.moves import urllib
import tensorflow.compat.v2.feature_column as fc 
import tensorflow as tf 
import seaborn as sns

# rc('text', usetex=True)
# matplotlib.rcParams['text.latex.preamble'] = [r'\usepackage{amsmath}']

# only if needed
#!apt install texlive-fonts-recommended texlive-fonts-extra cm-super dvipng
# plt.rc('font', family='serif')

# URL address of data
TRAIN_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/train.csv"

# Downloading data
train_file_path = tf.keras.utils.get_file("/content/sample_data/train.csv", TRAIN_DATA_URL)

# Setting numpy default values.
np.set_printoptions(precision=3, suppress=True)

# Reading data
data_train = pd.read_csv(train_file_path)

print("\n TRAIN DATA SET")
print(data_train.head(),"\n")

def heatMap(df):
    #Create Correlation df
    corr = df.corr()
    print(corr)
    #Plot figsize
    fig, ax = plt.subplots(figsize=(10, 10))
    #Generate Color Map
    colormap = sns.diverging_palette(220, 10, as_cmap=True)
    #Generate Heat Map, allow annotations and place floats in map
    sns.heatmap(corr, cmap=colormap, annot=True, fmt=".2f")
    #Apply xticks
    plt.xticks(range(len(corr.columns)), corr.columns);
    #Apply yticks
    plt.yticks(range(len(corr.columns)), corr.columns)
    #show plot
    plt.show()

heatMap(data_train)

enter image description here

Answered By – r-beginners

This Answer collected from stackoverflow, is licensed under cc by-sa 2.5 , cc by-sa 3.0 and cc by-sa 4.0

Leave a Reply

(*) Required, Your email will not be published