Skip to content

feature

Feature representation and feature sets.

Feature dataclass

Represents a single feature with metadata.

Attributes:

Name Type Description
name str

Feature name

dtype FeatureType

Data type of feature

origin FeatureOrigin

How the feature was created

source_columns list

Original columns used to create this feature

transformation str

Description of transformation applied

explanation (str, optional)

Human-readable explanation of the feature

code (str, optional)

Python code that generates this feature

importance (float, optional)

Feature importance score

metadata dict

Additional metadata

Source code in featcopilot/core/feature.py
@dataclass
class Feature:
    """
    Represents a single feature with metadata.

    Attributes
    ----------
    name : str
        Feature name
    dtype : FeatureType
        Data type of feature
    origin : FeatureOrigin
        How the feature was created
    source_columns : list
        Original columns used to create this feature
    transformation : str
        Description of transformation applied
    explanation : str, optional
        Human-readable explanation of the feature
    code : str, optional
        Python code that generates this feature
    importance : float, optional
        Feature importance score
    metadata : dict
        Additional metadata
    """

    name: str
    dtype: FeatureType = FeatureType.NUMERIC
    origin: FeatureOrigin = FeatureOrigin.ORIGINAL
    source_columns: list[str] = field(default_factory=list)
    transformation: str = ""
    explanation: Optional[str] = None
    code: Optional[str] = None
    importance: Optional[float] = None
    metadata: dict[str, Any] = field(default_factory=dict)

    def __post_init__(self):
        if not self.source_columns:
            self.source_columns = [self.name]

    def to_dict(self) -> dict[str, Any]:
        """Convert feature to dictionary."""
        return {
            "name": self.name,
            "dtype": self.dtype.value,
            "origin": self.origin.value,
            "source_columns": self.source_columns,
            "transformation": self.transformation,
            "explanation": self.explanation,
            "code": self.code,
            "importance": self.importance,
            "metadata": self.metadata,
        }

    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> "Feature":
        """Create feature from dictionary."""
        return cls(
            name=data["name"],
            dtype=FeatureType(data.get("dtype", "numeric")),
            origin=FeatureOrigin(data.get("origin", "original")),
            source_columns=data.get("source_columns", []),
            transformation=data.get("transformation", ""),
            explanation=data.get("explanation"),
            code=data.get("code"),
            importance=data.get("importance"),
            metadata=data.get("metadata", {}),
        )

    def compute(self, df: pd.DataFrame) -> pd.Series:
        """
        Compute feature values from DataFrame using stored code.

        Parameters
        ----------
        df : DataFrame
            Input data

        Returns
        -------
        Series
            Computed feature values
        """
        if self.code:
            # Execute stored code to compute feature
            local_vars = {"df": df, "np": np, "pd": pd}
            exec(self.code, {"__builtins__": {}}, local_vars)
            if "result" in local_vars:
                return local_vars["result"]
        raise ValueError(f"No code defined for feature {self.name}")

compute(df)

Compute feature values from DataFrame using stored code.

Parameters:

Name Type Description Default
df DataFrame

Input data

required

Returns:

Type Description
Series

Computed feature values

Source code in featcopilot/core/feature.py
def compute(self, df: pd.DataFrame) -> pd.Series:
    """
    Compute feature values from DataFrame using stored code.

    Parameters
    ----------
    df : DataFrame
        Input data

    Returns
    -------
    Series
        Computed feature values
    """
    if self.code:
        # Execute stored code to compute feature
        local_vars = {"df": df, "np": np, "pd": pd}
        exec(self.code, {"__builtins__": {}}, local_vars)
        if "result" in local_vars:
            return local_vars["result"]
    raise ValueError(f"No code defined for feature {self.name}")

from_dict(data) classmethod

Create feature from dictionary.

Source code in featcopilot/core/feature.py
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "Feature":
    """Create feature from dictionary."""
    return cls(
        name=data["name"],
        dtype=FeatureType(data.get("dtype", "numeric")),
        origin=FeatureOrigin(data.get("origin", "original")),
        source_columns=data.get("source_columns", []),
        transformation=data.get("transformation", ""),
        explanation=data.get("explanation"),
        code=data.get("code"),
        importance=data.get("importance"),
        metadata=data.get("metadata", {}),
    )

to_dict()

Convert feature to dictionary.

Source code in featcopilot/core/feature.py
def to_dict(self) -> dict[str, Any]:
    """Convert feature to dictionary."""
    return {
        "name": self.name,
        "dtype": self.dtype.value,
        "origin": self.origin.value,
        "source_columns": self.source_columns,
        "transformation": self.transformation,
        "explanation": self.explanation,
        "code": self.code,
        "importance": self.importance,
        "metadata": self.metadata,
    }

FeatureOrigin

Bases: Enum

Origin/source of feature.

Source code in featcopilot/core/feature.py
class FeatureOrigin(Enum):
    """Origin/source of feature."""

    ORIGINAL = "original"  # Original input feature
    POLYNOMIAL = "polynomial"  # Polynomial transformation
    INTERACTION = "interaction"  # Interaction between features
    AGGREGATION = "aggregation"  # Aggregation operation
    TIMESERIES = "timeseries"  # Time series extraction
    LLM_GENERATED = "llm_generated"  # Generated by LLM
    LLM_SUGGESTED = "llm_suggested"  # Suggested by LLM, implemented traditionally
    CUSTOM = "custom"  # Custom user-defined

FeatureSet

Collection of features with operations for manipulation.

Provides methods for adding, removing, filtering, and combining features.

Source code in featcopilot/core/feature.py
class FeatureSet:
    """
    Collection of features with operations for manipulation.

    Provides methods for adding, removing, filtering, and combining features.
    """

    def __init__(self, features: Optional[list[Feature]] = None):
        self._features: dict[str, Feature] = {}
        if features:
            for f in features:
                self.add(f)

    def __len__(self) -> int:
        return len(self._features)

    def __iter__(self):
        return iter(self._features.values())

    def __contains__(self, name: str) -> bool:
        return name in self._features

    def __getitem__(self, name: str) -> Feature:
        return self._features[name]

    def add(self, feature: Feature) -> None:
        """Add a feature to the set."""
        self._features[feature.name] = feature

    def remove(self, name: str) -> Optional[Feature]:
        """Remove and return a feature by name."""
        return self._features.pop(name, None)

    def get(self, name: str) -> Optional[Feature]:
        """Get a feature by name."""
        return self._features.get(name)

    def get_names(self) -> list[str]:
        """Get all feature names."""
        return list(self._features.keys())

    def filter_by_origin(self, origin: FeatureOrigin) -> "FeatureSet":
        """Filter features by origin."""
        return FeatureSet([f for f in self._features.values() if f.origin == origin])

    def filter_by_type(self, dtype: FeatureType) -> "FeatureSet":
        """Filter features by data type."""
        return FeatureSet([f for f in self._features.values() if f.dtype == dtype])

    def filter_by_importance(self, min_importance: float) -> "FeatureSet":
        """Filter features by minimum importance."""
        return FeatureSet(
            [f for f in self._features.values() if f.importance is not None and f.importance >= min_importance]
        )

    def sort_by_importance(self, descending: bool = True) -> list[Feature]:
        """Sort features by importance."""
        features = [f for f in self._features.values() if f.importance is not None]
        return sorted(features, key=lambda f: f.importance or 0, reverse=descending)

    def merge(self, other: "FeatureSet") -> "FeatureSet":
        """Merge with another feature set."""
        result = FeatureSet(list(self._features.values()))
        for f in other:
            result.add(f)
        return result

    def to_dataframe(self) -> pd.DataFrame:
        """Convert feature set to DataFrame with metadata."""
        return pd.DataFrame([f.to_dict() for f in self._features.values()])

    def get_explanations(self) -> dict[str, str]:
        """Get explanations for all features that have them."""
        return {f.name: f.explanation for f in self._features.values() if f.explanation}

    def compute_all(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Compute all features that have code defined.

        Parameters
        ----------
        df : DataFrame
            Input data

        Returns
        -------
        DataFrame
            DataFrame with computed features
        """
        result = df.copy()
        for feature in self._features.values():
            if feature.code and feature.name not in result.columns:
                try:
                    result[feature.name] = feature.compute(df)
                except Exception as e:
                    # Log warning but continue
                    logger.warning(f"Could not compute feature {feature.name}: {e}")
        return result

add(feature)

Add a feature to the set.

Source code in featcopilot/core/feature.py
def add(self, feature: Feature) -> None:
    """Add a feature to the set."""
    self._features[feature.name] = feature

compute_all(df)

Compute all features that have code defined.

Parameters:

Name Type Description Default
df DataFrame

Input data

required

Returns:

Type Description
DataFrame

DataFrame with computed features

Source code in featcopilot/core/feature.py
def compute_all(self, df: pd.DataFrame) -> pd.DataFrame:
    """
    Compute all features that have code defined.

    Parameters
    ----------
    df : DataFrame
        Input data

    Returns
    -------
    DataFrame
        DataFrame with computed features
    """
    result = df.copy()
    for feature in self._features.values():
        if feature.code and feature.name not in result.columns:
            try:
                result[feature.name] = feature.compute(df)
            except Exception as e:
                # Log warning but continue
                logger.warning(f"Could not compute feature {feature.name}: {e}")
    return result

filter_by_importance(min_importance)

Filter features by minimum importance.

Source code in featcopilot/core/feature.py
def filter_by_importance(self, min_importance: float) -> "FeatureSet":
    """Filter features by minimum importance."""
    return FeatureSet(
        [f for f in self._features.values() if f.importance is not None and f.importance >= min_importance]
    )

filter_by_origin(origin)

Filter features by origin.

Source code in featcopilot/core/feature.py
def filter_by_origin(self, origin: FeatureOrigin) -> "FeatureSet":
    """Filter features by origin."""
    return FeatureSet([f for f in self._features.values() if f.origin == origin])

filter_by_type(dtype)

Filter features by data type.

Source code in featcopilot/core/feature.py
def filter_by_type(self, dtype: FeatureType) -> "FeatureSet":
    """Filter features by data type."""
    return FeatureSet([f for f in self._features.values() if f.dtype == dtype])

get(name)

Get a feature by name.

Source code in featcopilot/core/feature.py
def get(self, name: str) -> Optional[Feature]:
    """Get a feature by name."""
    return self._features.get(name)

get_explanations()

Get explanations for all features that have them.

Source code in featcopilot/core/feature.py
def get_explanations(self) -> dict[str, str]:
    """Get explanations for all features that have them."""
    return {f.name: f.explanation for f in self._features.values() if f.explanation}

get_names()

Get all feature names.

Source code in featcopilot/core/feature.py
def get_names(self) -> list[str]:
    """Get all feature names."""
    return list(self._features.keys())

merge(other)

Merge with another feature set.

Source code in featcopilot/core/feature.py
def merge(self, other: "FeatureSet") -> "FeatureSet":
    """Merge with another feature set."""
    result = FeatureSet(list(self._features.values()))
    for f in other:
        result.add(f)
    return result

remove(name)

Remove and return a feature by name.

Source code in featcopilot/core/feature.py
def remove(self, name: str) -> Optional[Feature]:
    """Remove and return a feature by name."""
    return self._features.pop(name, None)

sort_by_importance(descending=True)

Sort features by importance.

Source code in featcopilot/core/feature.py
def sort_by_importance(self, descending: bool = True) -> list[Feature]:
    """Sort features by importance."""
    features = [f for f in self._features.values() if f.importance is not None]
    return sorted(features, key=lambda f: f.importance or 0, reverse=descending)

to_dataframe()

Convert feature set to DataFrame with metadata.

Source code in featcopilot/core/feature.py
def to_dataframe(self) -> pd.DataFrame:
    """Convert feature set to DataFrame with metadata."""
    return pd.DataFrame([f.to_dict() for f in self._features.values()])

FeatureType

Bases: Enum

Types of features.

Source code in featcopilot/core/feature.py
class FeatureType(Enum):
    """Types of features."""

    NUMERIC = "numeric"
    CATEGORICAL = "categorical"
    DATETIME = "datetime"
    TEXT = "text"
    BOOLEAN = "boolean"