Instructions to use juanwisz/modernbert-python-code-retrieval with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use juanwisz/modernbert-python-code-retrieval with sentence-transformers:

from sentence_transformers import SentenceTransformer

model = SentenceTransformer("juanwisz/modernbert-python-code-retrieval")

sentences = [
    "Clip off all parts from all bounding boxes that are outside of the image.\n\n        Returns\n        -------\n        imgaug.BoundingBoxesOnImage\n            Bounding boxes, clipped to fall within the image dimensions.",
    "def model_best(y1, y2, samples=1000, progressbar=True):\n    \"\"\"\n    Bayesian Estimation Supersedes the T-Test\n\n    This model runs a Bayesian hypothesis comparing if y1 and y2 come\n    from the same distribution. Returns are assumed to be T-distributed.\n\n    In addition, computes annual volatility and Sharpe of in and\n    out-of-sample periods.\n\n    This model replicates the example used in:\n    Kruschke, John. (2012) Bayesian estimation supersedes the t\n    test. Journal of Experimental Psychology: General.\n\n    Parameters\n    ----------\n    y1 : array-like\n        Array of returns (e.g. in-sample)\n    y2 : array-like\n        Array of returns (e.g. out-of-sample)\n    samples : int, optional\n        Number of posterior samples to draw.\n\n    Returns\n    -------\n    model : pymc.Model object\n        PyMC3 model containing all random variables.\n    trace : pymc3.sampling.BaseTrace object\n        A PyMC3 trace object that contains samples for each parameter\n        of the posterior.\n\n    See Also\n    --------\n    plot_stoch_vol : plotting of tochastic volatility model\n    \"\"\"\n\n    y = np.concatenate((y1, y2))\n\n    mu_m = np.mean(y)\n    mu_p = 0.000001 * 1 / np.std(y)**2\n\n    sigma_low = np.std(y) / 1000\n    sigma_high = np.std(y) * 1000\n    with pm.Model() as model:\n        group1_mean = pm.Normal('group1_mean', mu=mu_m, tau=mu_p,\n                                testval=y1.mean())\n        group2_mean = pm.Normal('group2_mean', mu=mu_m, tau=mu_p,\n                                testval=y2.mean())\n        group1_std = pm.Uniform('group1_std', lower=sigma_low,\n                                upper=sigma_high, testval=y1.std())\n        group2_std = pm.Uniform('group2_std', lower=sigma_low,\n                                upper=sigma_high, testval=y2.std())\n        nu = pm.Exponential('nu_minus_two', 1 / 29., testval=4.) + 2.\n\n        returns_group1 = pm.StudentT('group1', nu=nu, mu=group1_mean,\n                                     lam=group1_std**-2, observed=y1)\n        returns_group2 = pm.StudentT('group2', nu=nu, mu=group2_mean,\n                                     lam=group2_std**-2, observed=y2)\n\n        diff_of_means = pm.Deterministic('difference of means',\n                                         group2_mean - group1_mean)\n        pm.Deterministic('difference of stds',\n                         group2_std - group1_std)\n        pm.Deterministic('effect size', diff_of_means /\n                         pm.math.sqrt((group1_std**2 +\n                                       group2_std**2) / 2))\n\n        pm.Deterministic('group1_annual_volatility',\n                         returns_group1.distribution.variance**.5 *\n                         np.sqrt(252))\n        pm.Deterministic('group2_annual_volatility',\n                         returns_group2.distribution.variance**.5 *\n                         np.sqrt(252))\n\n        pm.Deterministic('group1_sharpe', returns_group1.distribution.mean /\n                         returns_group1.distribution.variance**.5 *\n                         np.sqrt(252))\n        pm.Deterministic('group2_sharpe', returns_group2.distribution.mean /\n                         returns_group2.distribution.variance**.5 *\n                         np.sqrt(252))\n\n        trace = pm.sample(samples, progressbar=progressbar)\n    return model, trace",
    "def clip_out_of_image(self):\n        \"\"\"\n        Clip off all parts from all bounding boxes that are outside of the image.\n\n        Returns\n        -------\n        imgaug.BoundingBoxesOnImage\n            Bounding boxes, clipped to fall within the image dimensions.\n\n        \"\"\"\n        bbs_cut = [bb.clip_out_of_image(self.shape)\n                   for bb in self.bounding_boxes if bb.is_partly_within_image(self.shape)]\n        return BoundingBoxesOnImage(bbs_cut, shape=self.shape)",
    "def _initPermanence(self, potential, connectedPct):\n    \"\"\"\n    Initializes the permanences of a column. The method\n    returns a 1-D array the size of the input, where each entry in the\n    array represents the initial permanence value between the input bit\n    at the particular index in the array, and the column represented by\n    the 'index' parameter.\n\n    Parameters:\n    ----------------------------\n    :param potential: A numpy array specifying the potential pool of the column.\n                    Permanence values will only be generated for input bits\n                    corresponding to indices for which the mask value is 1.\n    :param connectedPct: A value between 0 or 1 governing the chance, for each\n                         permanence, that the initial permanence value will\n                         be a value that is considered connected.\n    \"\"\"\n    # Determine which inputs bits will start out as connected\n    # to the inputs. Initially a subset of the input bits in a\n    # column's potential pool will be connected. This number is\n    # given by the parameter \"connectedPct\"\n    perm = numpy.zeros(self._numInputs, dtype=realDType)\n    for i in xrange(self._numInputs):\n      if (potential[i] < 1):\n        continue\n\n      if (self._random.getReal64() <= connectedPct):\n        perm[i] = self._initPermConnected()\n      else:\n        perm[i] = self._initPermNonConnected()\n\n    # Clip off low values. Since we use a sparse representation\n    # to store the permanence values this helps reduce memory\n    # requirements.\n    perm[perm < self._synPermTrimThreshold] = 0\n\n    return perm"
]
embeddings = model.encode(sentences)

similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [4, 4]

Notebooks
Google Colab
Kaggle

modernbert-python-code-retrieval

File size: 296 Bytes

b9a978d

{
  "word_embedding_dimension": 768,
  "pooling_mode_cls_token": true,
  "pooling_mode_mean_tokens": false,
  "pooling_mode_max_tokens": false,
  "pooling_mode_mean_sqrt_len_tokens": false,
  "pooling_mode_weightedmean_tokens": false,
  "pooling_mode_lasttoken": false,
  "include_prompt": true
}